Beispiel #1
0
 def parse_info(self, response):
     item = CellItem()
     item['title'] = strip_tag(
         response.xpath(
             '//meta[@name="citation_title"]/@content').extract_first())
     item['link'] = response.meta['link']
     issn = response.xpath(
         '//meta[@name="citation_issn"]/@content').extract()[0] or ""
     item['issn'] = issn
     if issn == "0092-8674":
         item['if_2017'] = 31.398
         item['source'] = "Cell"
     elif issn == "1931-3128":
         item['if_2017'] = 17.872
         item['source'] = "Cell host & microbe"
     elif issn == "1097-2765":
         item['if_2017'] = 14.248
         item['source'] = "Molecular cell"
     elif issn == "1074-7613":
         item['if_2017'] = 19.734
         item['source'] = "Immunity"
     elif issn == "1535-6108":
         item['if_2017'] = 22.844
         item['source'] = "Cancer cell"
     elif issn == "1550-4131":
         item['if_2017'] = 20.565
         item['source'] = "Cell metabolism"
     elif issn == "1471-4906":
         item['if_2017'] = 14.188
         item['source'] = "Trends in immunology"
     elif issn == "1471-4914":
         item['if_2017'] = 11.021
         item['source'] = "Trends in molecular medicine"
     elif issn == "0896-6273":
         item['if_2017'] = 14.318
         item['source'] = "Neuron"
     elif issn == "1934-5909":
         item['if_2017'] = 23.29
         item['source'] = "Cell stem cell"
     item['pub_date'] = tranfrom_date(
         response.xpath('//meta[contains(@name, "date")]/@content').get())
     item['abstract'] = handler_abstract(
         response.xpath('//div[@id="article"]//div[@class="content"]//p/text()').extract()) or \
                        strip_tag(response.xpath('//meta[@name="citation_abstract"]/@content').get()) or ""
     item['doi'] = response.xpath(
         '//meta[@name="citation_doi"]/@content').get()
     item['authors'] = response.xpath(
         '//meta[@name="citation_author"]/@content').extract()
     item["is_pubmed"] = 0
     # item['AffiliationInfo'] = response.xpath('//meta[@name="citation_author_institution"]').get()
     yield item
Beispiel #2
0
 def parse_info(self, response):
     item = CellItem()
     item['title'] = response.xpath(
         '//meta[@name="dc.title"]/@content').extract_first()
     item['link'] = response.meta['link']
     item['source'] = "eLife"
     item['pub_date'] = response.xpath(
         '//meta[@name="dc.date"]/@content').get()
     item['abstract'] = strip_tag(
         response.xpath('//section[@id="abstract"]//p//text()').extract())
     item['doi'] = response.xpath(
         '//meta[@name="dc.identifier"]/@content').get()[4:]
     item['authors'] = response.xpath(
         '//meta[@name="dc.contributor"]/@content').extract()
     # item['AffiliationInfo'] = response.xpath('//meta[@name="citation_author_institution"]/@content').extract()
     item['if_2017'] = 7.616
     item['issn'] = response.xpath(
         '//hypothesis-highlight//text()').get() or '2050-084X'
     item["is_pubmed"] = 0
     yield item
Beispiel #3
0
 def parse_info(self, response):
     item = CellItem()
     item['title'] = response.xpath(
         '//meta[@name="dc.Title"]//@content').extract_first()
     item['link'] = response.meta['link']
     item[
         'source'] = 'American journal of respiratory and critical care medicine'
     item['pub_date'] = response.xpath(
         '//meta[@name="dc.Date"]/@content').get()
     item['abstract'] = strip_tag(response.xpath('//div[@class="hlFld-Abstract"]//p//text() ').extract()) or \
                        strip_tag(response.xpath('//div[@class="abstract"]//p//text()').extract())
     item['doi'] = response.xpath(
         '//meta[@name="dc.Identifier"]/@content').get()
     item['authors'] = response.xpath(
         '//meta[@name="dc.Creator"]//@content').extract()
     # item['AffiliationInfo'] = response.xpath('//meta[@name="citation_author_institution"]/@content').extract()
     item['if_2017'] = 15.239
     item['issn'] = '1535-4970'
     item["is_pubmed"] = 0
     yield item
Beispiel #4
0
 def parse_info(self, response):
     item = CellItem()
     item['title'] = response.xpath(
         '//meta[@name="citation_title"]/@content').extract_first()
     item['link'] = response.meta['link']
     item['source'] = "Gastroenterology"
     item['pub_date'] = tranfrom_date1(response.xpath('//meta[@name="citation_online_date"]/@content').get()) or \
                        tranfrom_date1(response.xpath('//meta[@name="citation_date"]/@content').get())
     item['abstract'] = strip_tag(response.xpath('//meta[@name="citation_abstract"]/@content').extract()) or \
                        strip_tag(response.xpath('//div[@class="content"]//text()').extract())
     item['doi'] = response.xpath(
         '//meta[@name="citation_doi"]/@content').get()
     item['authors'] = response.xpath(
         '//meta[@name="citation_author"]/@content').extract()
     # item['AffiliationInfo'] = response.xpath('//meta[@name="citation_author_institution"]/@content').extract()
     item['if_2017'] = 20.773
     item['issn'] = response.xpath(
         '//meta[@name="citation_issn"]/@content').extract()[0]
     item["is_pubmed"] = 0
     yield item
Beispiel #5
0
 def parse_info(self, response):
     item = CellItem()
     item['title'] = response.xpath(
         '//meta[@name="citation_title"]/@content').extract_first()
     item['link'] = response.meta['link']
     item['source'] = "CA: a cancer journal for clinicians"
     item['pub_date'] = tranfrom_date(
         response.xpath('//div//span[@class="epub-date"]//text()').get())
     item['abstract'] = strip_tag(response.xpath('//div[@class="article-section__content"]/p/text()').extract()) or \
                        strip_tag(response.xpath(
                            '//section//div[@class="article-section__content en main"]//p//text()').extract())
     item['doi'] = response.xpath(
         '//meta[@name="citation_doi"]/@content').get()
     item['authors'] = response.xpath(
         '//meta[@name="citation_author"]/@content').extract()
     # item['AffiliationInfo'] = response.xpath('//meta[@name="citation_author_institution"]/@content').extract()
     # "issnPrint" : "0007-9235",
     item['if_2017'] = 244.585
     item['issn'] = "1542-4863"
     item["is_pubmed"] = 0
     yield item
Beispiel #6
0
 def parse_info(self, response):
     item = CellItem()
     item['title'] = response.xpath(
         '//meta[@name="dc.Title"]//@content').extract_first()
     item['link'] = response.meta['link']
     item['source'] = 'The American journal of psychiatry'
     item['pub_date'] = response.xpath(
         '//meta[@name="dc.Date"]/@content').get()
     item['abstract'] = strip_tag(
         response.xpath(
             '//div[@class="abstractSection abstractInFull"]//p//text()').
         extract())
     item['doi'] = response.xpath(
         '//meta[@name="dc.Identifier"]/@content').extract()[1]
     item['authors'] = response.xpath(
         '//meta[@name="dc.Creator"]//@content').extract()
     # item['AffiliationInfo'] = response.xpath('//meta[@name="citation_author_institution"]/@content').extract()
     item['if_2017'] = 13.391
     item['issn'] = '1535-7228'
     item["is_pubmed"] = 0
     yield item
Beispiel #7
0
 def parse_info(self, response):
     item = CellItem()
     item['title'] = response.xpath(
         '//meta[@name="citation_title"]//@content').extract_first()
     item['link'] = response.meta['link']
     item['source'] = "The European respiratory journal"
     item['pub_date'] = response.xpath(
         '//meta[@name="article:published_time"]/@content').get()
     item['abstract'] = strip_tag(
         response.xpath(
             '//meta[@name="DC.Description"]/@content').extract())
     item['doi'] = response.xpath(
         '//meta[@name="citation_doi"]/@content').get()
     item['authors'] = response.xpath(
         '//meta[@name="citation_author"]//@content').extract()
     # item['AffiliationInfo'] = response.xpath('//meta[@name="citation_author_institution"]/@content').extract()
     item['if_2017'] = 12.242
     item['issn'] = response.xpath(
         '//meta[@name="citation_issn"]/@content').extract()[1]
     item["is_pubmed"] = 0
     yield item
Beispiel #8
0
 def parse_info(self, response):
     item = CellItem()
     item['title'] = response.xpath(
         '//meta[@name="citation_title"]//@content').extract_first()
     item['link'] = response.meta['link']
     item['source'] = "Blood"
     item['pub_date'] = response.xpath(
         '//meta[@name="DC.Date"]/@content').get()
     item['abstract'] = strip_tag(
         response.xpath(
             '///meta[@name="citation_abstract"]/@content').extract())
     item['doi'] = response.xpath(
         '//meta[@name="citation_doi"]/@content').get()
     item['authors'] = response.xpath(
         '//meta[@name="citation_author"]//@content').extract()
     # item['AffiliationInfo'] = response.xpath('//meta[@name="citation_author_institution"]/@content').extract()
     item['if_2017'] = 15.132
     item['issn'] = response.xpath(
         '//meta[@name="citation_issn"]/@content').extract()[0]
     item["is_pubmed"] = 0
     yield item
Beispiel #9
0
 def parse_info(self, response):
     item = CellItem()
     item['title'] = response.xpath(
         '//meta[@name="citation_title"]//@content').extract_first()
     item['link'] = response.meta['link']
     item['source'] = "Journal of the American College of Cardiology"
     item['pub_date'] = response.xpath(
         '///meta[@name="DC.Date"]/@content').get()
     item['abstract'] = strip_tag(
         response.xpath(
             '//meta[@name="DC.Description"]/@content').extract())
     item['doi'] = response.xpath(
         '//meta[@name="DC.Identifier"]/@content').get()
     item['authors'] = response.xpath(
         '//meta[@name="DC.Contributor"]//@content').extract()
     # item['AffiliationInfo'] = response.xpath('//meta[@name="citation_author_institution"]/@content').extract()
     item['if_2017'] = 16.834
     item['issn'] = response.xpath(
         '//meta[@name="citation_issn"]/@content').extract()[1]
     item["is_pubmed"] = 0
     yield item
Beispiel #10
0
 def parse_info(self, response):
     item = CellItem()
     item['title'] = response.xpath(
         '//meta[@name="citation_title"]//@content').extract_first()
     item['link'] = response.meta['link']
     item['source'] = "The Journal of experimental medicine"
     item['pub_date'] = response.xpath(
         '//meta[@name="DC.Date"]/@content').get()
     item['abstract'] = strip_tag(
         response.xpath(
             '//meta[@name="og:description"]/@content').extract())
     item['doi'] = response.xpath(
         '//meta[@name="DC.Identifier"]/@content').get()
     item['authors'] = response.xpath(
         '//meta[@name="citation_author"]//@content').extract()
     # item['AffiliationInfo'] = response.xpath('//meta[@name="citation_author_institution"]/@content').extract()
     item['if_2017'] = 10.79
     item['issn'] = response.xpath(
         '//meta[@name="citation_issn"]/@content').extract()[1]
     item["is_pubmed"] = 0
     yield item
Beispiel #11
0
 def parse_info(self, response):
     item = CellItem()
     item['title'] = strip_tag(
         response.xpath(
             '//div[@class="publicationContentTitle"]//h1//text()').extract(
             )).strip()
     item['link'] = response.meta['link']
     item[
         'source'] = "Journal of clinical oncology : official journal of the American Society of Clinical Oncology"
     item['pub_date'] = response.meta['pub_date']
     item['abstract'] = strip_tag(
         response.xpath(
             '//div[@class="abstractSection abstractInFull"]//text()').
         extract())
     item['doi'] = response.meta['detail_url'][10:]
     item['authors'] = response.xpath(
         '//div[@class="header"]//text()').extract()
     # item['AffiliationInfo'] = response.xpath('//meta[@name="citation_author_institution"]/@content').extract()
     item['if_2017'] = 26.303
     item['issn'] = "1527-7755"
     item["is_pubmed"] = 0
     yield item
Beispiel #12
0
 def parse_info(self, response):
     item = CellItem()
     item['title'] = strip_tag(
         response.xpath(
             '//meta[@name="citation_title"]//@content').extract())
     item['link'] = response.meta['link']
     item['source'] = "The Journal of allergy and clinical immunology"
     item['pub_date'] = tranfrom_date1(
         response.xpath(
             '//meta[@name="citation_online_date"]/@content').get())
     item['abstract'] = strip_tag(
         response.xpath('//div[@class="content"]//text()').extract())
     item['doi'] = response.xpath(
         '//meta[@name="citation_doi"]/@content').get()
     item['authors'] = response.xpath(
         '//meta[@name="citation_author"]//@content').extract()
     # item['AffiliationInfo'] = response.xpath('//meta[@name="citation_author_institution"]/@content').extract()
     item['if_2017'] = 13.258
     item['issn'] = response.xpath(
         '//meta[@name="citation_issn"]/@content').extract()[0]
     item["is_pubmed"] = 0
     yield item
Beispiel #13
0
    def parse_info(self, response):
        item = CellItem()
        item['title'] = strip_tag(
            response.xpath(
                '//meta[@name="citation_title"]//@content').extract())
        item['link'] = response.meta['link']
        item['source'] = "The Journal of clinical investigation"
        item['pub_date'] = response.xpath(
            '///meta[@name="DC.Date"]/@content').get()
        item['abstract'] = strip_tag(response.xpath('//div[@id="section-abstract"]//p//text()').extract()) or \
                           strip_tag(response.xpath('//div[contains(@style,"margin-bottom")]//p//text()').extract())

        item['doi'] = response.xpath(
            '//meta[@name="citation_doi"]/@content').get()
        item['authors'] = response.xpath(
            '//meta[@name="citation_author"]//@content').extract()
        # item['AffiliationInfo'] = response.xpath('//meta[@name="citation_author_institution"]/@content').extract()
        item['if_2017'] = 13.251
        item['issn'] = response.xpath(
            '//meta[@name="citation_issn"]/@content').get()
        item["is_pubmed"] = 0
        yield item
Beispiel #14
0
 def parse_info(self, response):
     item = CellItem()
     item['title'] = response.xpath(
         '//meta[@name="dc.Title"]//@content').extract_first().strip('\n')
     item['link'] = response.meta['link']
     item['source'] = "Circulation"
     item['pub_date'] = tranfrom_date(
         response.xpath(
             '//div[@class="epub-section"]//span[@class="epub-section__date"]//text()'
         ).get())
     item['abstract'] = strip_tag(
         response.xpath(
             '//div[@class="hlFld-Abstract"]//p//text()').extract())
     item['doi'] = doi_str(
         response.xpath('//input[@name="redirectUri"]//@value').get())
     item['authors'] = response.xpath(
         '//meta[@name="dc.Creator"]//@content').extract()
     # item['AffiliationInfo'] = response.xpath('//meta[@name="citation_author_institution"]/@content').extract()
     item['if_2017'] = 18.88
     item['issn'] = response.xpath(
         '//meta[@name="citation_issn"]/@content').extract() or "0009-7322"
     item["is_pubmed"] = 0
     yield item
Beispiel #15
0
 def parse_info(self, response):
     item = CellItem()
     item['title'] = strip_tag(response.xpath('//meta[@name="citation_title"]//@content').extract()) or\
                     strip_tag(response.xpath('//meta[@name="dc.title"]//@content').extract())
     item['link'] = response.meta['link']
     item['source'] = "Molecular psychiatry"
     item['pub_date'] = response.xpath(
         '///meta[@name="dc.date"]/@content').get()
     item['abstract'] = strip_tag(
         response.xpath('//div[@itemprop="description"]//p//text()').
         extract()) or strip_tag(
             response.xpath(
                 '//div[@id="Abs1-content"]//p//text()').extract())
     item['doi'] = response.xpath(
         '//meta[@name="prism.doi"]/@content').get().split(":")[1]
     item['authors'] = response.xpath(
         '//meta[@name="citation_author"]//@content').extract()
     # item['AffiliationInfo'] = response.xpath('//meta[@name="citation_author_institution"]/@content').extract()
     item['if_2017'] = 11.64
     item['issn'] = response.xpath(
         '//meta[@name="citation_issn"]/@content').get()
     item["is_pubmed"] = 0
     yield item
Beispiel #16
0
 def parse_info(self, response):
     item = CellItem()
     item['title'] = response.xpath(
         '//meta[@name="citation_title"]/@content').extract_first()
     item['link'] = response.meta['link']
     item['source'] = "Annals of internal medicine"
     item['pub_date'] = tranfrom_date(
         response.xpath(
             '//span[@class="wi-pub-date large-view-only"]//text()').
         extract()[2].strip())
     item['abstract'] = strip_tag(response.xpath('//section[@class="abstract"]//p//text()').extract()) or \
                        strip_tag(response.xpath('//div[@class="typed para"]//text()').extract()) or \
                        strip_tag(response.xpath(
                            '//div[contains(@class,"content-section")]//div[@class="typed para"]//text()').extract()) or ""
     item['doi'] = response.xpath(
         '//meta[@name="citation_doi"]/@content').get()
     item['authors'] = response.xpath(
         '//meta[@name="citation_author"]/@content').extract()
     # item['AffiliationInfo'] = response.xpath('//meta[@name="citation_author_institution"]/@content').extract()
     item['if_2017'] = 19.384
     # "issnPrint" : "0003-4819"
     item['issn'] = "1539-3704"
     item["is_pubmed"] = 0
     yield item