def parse_info(self, response): item = CellItem() item['title'] = strip_tag( response.xpath( '//meta[@name="citation_title"]/@content').extract_first()) item['link'] = response.meta['link'] issn = response.xpath( '//meta[@name="citation_issn"]/@content').extract()[0] or "" item['issn'] = issn if issn == "0092-8674": item['if_2017'] = 31.398 item['source'] = "Cell" elif issn == "1931-3128": item['if_2017'] = 17.872 item['source'] = "Cell host & microbe" elif issn == "1097-2765": item['if_2017'] = 14.248 item['source'] = "Molecular cell" elif issn == "1074-7613": item['if_2017'] = 19.734 item['source'] = "Immunity" elif issn == "1535-6108": item['if_2017'] = 22.844 item['source'] = "Cancer cell" elif issn == "1550-4131": item['if_2017'] = 20.565 item['source'] = "Cell metabolism" elif issn == "1471-4906": item['if_2017'] = 14.188 item['source'] = "Trends in immunology" elif issn == "1471-4914": item['if_2017'] = 11.021 item['source'] = "Trends in molecular medicine" elif issn == "0896-6273": item['if_2017'] = 14.318 item['source'] = "Neuron" elif issn == "1934-5909": item['if_2017'] = 23.29 item['source'] = "Cell stem cell" item['pub_date'] = tranfrom_date( response.xpath('//meta[contains(@name, "date")]/@content').get()) item['abstract'] = handler_abstract( response.xpath('//div[@id="article"]//div[@class="content"]//p/text()').extract()) or \ strip_tag(response.xpath('//meta[@name="citation_abstract"]/@content').get()) or "" item['doi'] = response.xpath( '//meta[@name="citation_doi"]/@content').get() item['authors'] = response.xpath( '//meta[@name="citation_author"]/@content').extract() item["is_pubmed"] = 0 # item['AffiliationInfo'] = response.xpath('//meta[@name="citation_author_institution"]').get() yield item
def parse_info(self, response): item = CellItem() item['title'] = response.xpath( '//meta[@name="dc.title"]/@content').extract_first() item['link'] = response.meta['link'] item['source'] = "eLife" item['pub_date'] = response.xpath( '//meta[@name="dc.date"]/@content').get() item['abstract'] = strip_tag( response.xpath('//section[@id="abstract"]//p//text()').extract()) item['doi'] = response.xpath( '//meta[@name="dc.identifier"]/@content').get()[4:] item['authors'] = response.xpath( '//meta[@name="dc.contributor"]/@content').extract() # item['AffiliationInfo'] = response.xpath('//meta[@name="citation_author_institution"]/@content').extract() item['if_2017'] = 7.616 item['issn'] = response.xpath( '//hypothesis-highlight//text()').get() or '2050-084X' item["is_pubmed"] = 0 yield item
def parse_info(self, response): item = CellItem() item['title'] = response.xpath( '//meta[@name="dc.Title"]//@content').extract_first() item['link'] = response.meta['link'] item[ 'source'] = 'American journal of respiratory and critical care medicine' item['pub_date'] = response.xpath( '//meta[@name="dc.Date"]/@content').get() item['abstract'] = strip_tag(response.xpath('//div[@class="hlFld-Abstract"]//p//text() ').extract()) or \ strip_tag(response.xpath('//div[@class="abstract"]//p//text()').extract()) item['doi'] = response.xpath( '//meta[@name="dc.Identifier"]/@content').get() item['authors'] = response.xpath( '//meta[@name="dc.Creator"]//@content').extract() # item['AffiliationInfo'] = response.xpath('//meta[@name="citation_author_institution"]/@content').extract() item['if_2017'] = 15.239 item['issn'] = '1535-4970' item["is_pubmed"] = 0 yield item
def parse_info(self, response): item = CellItem() item['title'] = response.xpath( '//meta[@name="citation_title"]/@content').extract_first() item['link'] = response.meta['link'] item['source'] = "Gastroenterology" item['pub_date'] = tranfrom_date1(response.xpath('//meta[@name="citation_online_date"]/@content').get()) or \ tranfrom_date1(response.xpath('//meta[@name="citation_date"]/@content').get()) item['abstract'] = strip_tag(response.xpath('//meta[@name="citation_abstract"]/@content').extract()) or \ strip_tag(response.xpath('//div[@class="content"]//text()').extract()) item['doi'] = response.xpath( '//meta[@name="citation_doi"]/@content').get() item['authors'] = response.xpath( '//meta[@name="citation_author"]/@content').extract() # item['AffiliationInfo'] = response.xpath('//meta[@name="citation_author_institution"]/@content').extract() item['if_2017'] = 20.773 item['issn'] = response.xpath( '//meta[@name="citation_issn"]/@content').extract()[0] item["is_pubmed"] = 0 yield item
def parse_info(self, response): item = CellItem() item['title'] = response.xpath( '//meta[@name="citation_title"]/@content').extract_first() item['link'] = response.meta['link'] item['source'] = "CA: a cancer journal for clinicians" item['pub_date'] = tranfrom_date( response.xpath('//div//span[@class="epub-date"]//text()').get()) item['abstract'] = strip_tag(response.xpath('//div[@class="article-section__content"]/p/text()').extract()) or \ strip_tag(response.xpath( '//section//div[@class="article-section__content en main"]//p//text()').extract()) item['doi'] = response.xpath( '//meta[@name="citation_doi"]/@content').get() item['authors'] = response.xpath( '//meta[@name="citation_author"]/@content').extract() # item['AffiliationInfo'] = response.xpath('//meta[@name="citation_author_institution"]/@content').extract() # "issnPrint" : "0007-9235", item['if_2017'] = 244.585 item['issn'] = "1542-4863" item["is_pubmed"] = 0 yield item
def parse_info(self, response): item = CellItem() item['title'] = response.xpath( '//meta[@name="dc.Title"]//@content').extract_first() item['link'] = response.meta['link'] item['source'] = 'The American journal of psychiatry' item['pub_date'] = response.xpath( '//meta[@name="dc.Date"]/@content').get() item['abstract'] = strip_tag( response.xpath( '//div[@class="abstractSection abstractInFull"]//p//text()'). extract()) item['doi'] = response.xpath( '//meta[@name="dc.Identifier"]/@content').extract()[1] item['authors'] = response.xpath( '//meta[@name="dc.Creator"]//@content').extract() # item['AffiliationInfo'] = response.xpath('//meta[@name="citation_author_institution"]/@content').extract() item['if_2017'] = 13.391 item['issn'] = '1535-7228' item["is_pubmed"] = 0 yield item
def parse_info(self, response): item = CellItem() item['title'] = response.xpath( '//meta[@name="citation_title"]//@content').extract_first() item['link'] = response.meta['link'] item['source'] = "The European respiratory journal" item['pub_date'] = response.xpath( '//meta[@name="article:published_time"]/@content').get() item['abstract'] = strip_tag( response.xpath( '//meta[@name="DC.Description"]/@content').extract()) item['doi'] = response.xpath( '//meta[@name="citation_doi"]/@content').get() item['authors'] = response.xpath( '//meta[@name="citation_author"]//@content').extract() # item['AffiliationInfo'] = response.xpath('//meta[@name="citation_author_institution"]/@content').extract() item['if_2017'] = 12.242 item['issn'] = response.xpath( '//meta[@name="citation_issn"]/@content').extract()[1] item["is_pubmed"] = 0 yield item
def parse_info(self, response): item = CellItem() item['title'] = response.xpath( '//meta[@name="citation_title"]//@content').extract_first() item['link'] = response.meta['link'] item['source'] = "Blood" item['pub_date'] = response.xpath( '//meta[@name="DC.Date"]/@content').get() item['abstract'] = strip_tag( response.xpath( '///meta[@name="citation_abstract"]/@content').extract()) item['doi'] = response.xpath( '//meta[@name="citation_doi"]/@content').get() item['authors'] = response.xpath( '//meta[@name="citation_author"]//@content').extract() # item['AffiliationInfo'] = response.xpath('//meta[@name="citation_author_institution"]/@content').extract() item['if_2017'] = 15.132 item['issn'] = response.xpath( '//meta[@name="citation_issn"]/@content').extract()[0] item["is_pubmed"] = 0 yield item
def parse_info(self, response): item = CellItem() item['title'] = response.xpath( '//meta[@name="citation_title"]//@content').extract_first() item['link'] = response.meta['link'] item['source'] = "Journal of the American College of Cardiology" item['pub_date'] = response.xpath( '///meta[@name="DC.Date"]/@content').get() item['abstract'] = strip_tag( response.xpath( '//meta[@name="DC.Description"]/@content').extract()) item['doi'] = response.xpath( '//meta[@name="DC.Identifier"]/@content').get() item['authors'] = response.xpath( '//meta[@name="DC.Contributor"]//@content').extract() # item['AffiliationInfo'] = response.xpath('//meta[@name="citation_author_institution"]/@content').extract() item['if_2017'] = 16.834 item['issn'] = response.xpath( '//meta[@name="citation_issn"]/@content').extract()[1] item["is_pubmed"] = 0 yield item
def parse_info(self, response): item = CellItem() item['title'] = response.xpath( '//meta[@name="citation_title"]//@content').extract_first() item['link'] = response.meta['link'] item['source'] = "The Journal of experimental medicine" item['pub_date'] = response.xpath( '//meta[@name="DC.Date"]/@content').get() item['abstract'] = strip_tag( response.xpath( '//meta[@name="og:description"]/@content').extract()) item['doi'] = response.xpath( '//meta[@name="DC.Identifier"]/@content').get() item['authors'] = response.xpath( '//meta[@name="citation_author"]//@content').extract() # item['AffiliationInfo'] = response.xpath('//meta[@name="citation_author_institution"]/@content').extract() item['if_2017'] = 10.79 item['issn'] = response.xpath( '//meta[@name="citation_issn"]/@content').extract()[1] item["is_pubmed"] = 0 yield item
def parse_info(self, response): item = CellItem() item['title'] = strip_tag( response.xpath( '//div[@class="publicationContentTitle"]//h1//text()').extract( )).strip() item['link'] = response.meta['link'] item[ 'source'] = "Journal of clinical oncology : official journal of the American Society of Clinical Oncology" item['pub_date'] = response.meta['pub_date'] item['abstract'] = strip_tag( response.xpath( '//div[@class="abstractSection abstractInFull"]//text()'). extract()) item['doi'] = response.meta['detail_url'][10:] item['authors'] = response.xpath( '//div[@class="header"]//text()').extract() # item['AffiliationInfo'] = response.xpath('//meta[@name="citation_author_institution"]/@content').extract() item['if_2017'] = 26.303 item['issn'] = "1527-7755" item["is_pubmed"] = 0 yield item
def parse_info(self, response): item = CellItem() item['title'] = strip_tag( response.xpath( '//meta[@name="citation_title"]//@content').extract()) item['link'] = response.meta['link'] item['source'] = "The Journal of allergy and clinical immunology" item['pub_date'] = tranfrom_date1( response.xpath( '//meta[@name="citation_online_date"]/@content').get()) item['abstract'] = strip_tag( response.xpath('//div[@class="content"]//text()').extract()) item['doi'] = response.xpath( '//meta[@name="citation_doi"]/@content').get() item['authors'] = response.xpath( '//meta[@name="citation_author"]//@content').extract() # item['AffiliationInfo'] = response.xpath('//meta[@name="citation_author_institution"]/@content').extract() item['if_2017'] = 13.258 item['issn'] = response.xpath( '//meta[@name="citation_issn"]/@content').extract()[0] item["is_pubmed"] = 0 yield item
def parse_info(self, response): item = CellItem() item['title'] = strip_tag( response.xpath( '//meta[@name="citation_title"]//@content').extract()) item['link'] = response.meta['link'] item['source'] = "The Journal of clinical investigation" item['pub_date'] = response.xpath( '///meta[@name="DC.Date"]/@content').get() item['abstract'] = strip_tag(response.xpath('//div[@id="section-abstract"]//p//text()').extract()) or \ strip_tag(response.xpath('//div[contains(@style,"margin-bottom")]//p//text()').extract()) item['doi'] = response.xpath( '//meta[@name="citation_doi"]/@content').get() item['authors'] = response.xpath( '//meta[@name="citation_author"]//@content').extract() # item['AffiliationInfo'] = response.xpath('//meta[@name="citation_author_institution"]/@content').extract() item['if_2017'] = 13.251 item['issn'] = response.xpath( '//meta[@name="citation_issn"]/@content').get() item["is_pubmed"] = 0 yield item
def parse_info(self, response): item = CellItem() item['title'] = response.xpath( '//meta[@name="dc.Title"]//@content').extract_first().strip('\n') item['link'] = response.meta['link'] item['source'] = "Circulation" item['pub_date'] = tranfrom_date( response.xpath( '//div[@class="epub-section"]//span[@class="epub-section__date"]//text()' ).get()) item['abstract'] = strip_tag( response.xpath( '//div[@class="hlFld-Abstract"]//p//text()').extract()) item['doi'] = doi_str( response.xpath('//input[@name="redirectUri"]//@value').get()) item['authors'] = response.xpath( '//meta[@name="dc.Creator"]//@content').extract() # item['AffiliationInfo'] = response.xpath('//meta[@name="citation_author_institution"]/@content').extract() item['if_2017'] = 18.88 item['issn'] = response.xpath( '//meta[@name="citation_issn"]/@content').extract() or "0009-7322" item["is_pubmed"] = 0 yield item
def parse_info(self, response): item = CellItem() item['title'] = strip_tag(response.xpath('//meta[@name="citation_title"]//@content').extract()) or\ strip_tag(response.xpath('//meta[@name="dc.title"]//@content').extract()) item['link'] = response.meta['link'] item['source'] = "Molecular psychiatry" item['pub_date'] = response.xpath( '///meta[@name="dc.date"]/@content').get() item['abstract'] = strip_tag( response.xpath('//div[@itemprop="description"]//p//text()'). extract()) or strip_tag( response.xpath( '//div[@id="Abs1-content"]//p//text()').extract()) item['doi'] = response.xpath( '//meta[@name="prism.doi"]/@content').get().split(":")[1] item['authors'] = response.xpath( '//meta[@name="citation_author"]//@content').extract() # item['AffiliationInfo'] = response.xpath('//meta[@name="citation_author_institution"]/@content').extract() item['if_2017'] = 11.64 item['issn'] = response.xpath( '//meta[@name="citation_issn"]/@content').get() item["is_pubmed"] = 0 yield item
def parse_info(self, response): item = CellItem() item['title'] = response.xpath( '//meta[@name="citation_title"]/@content').extract_first() item['link'] = response.meta['link'] item['source'] = "Annals of internal medicine" item['pub_date'] = tranfrom_date( response.xpath( '//span[@class="wi-pub-date large-view-only"]//text()'). extract()[2].strip()) item['abstract'] = strip_tag(response.xpath('//section[@class="abstract"]//p//text()').extract()) or \ strip_tag(response.xpath('//div[@class="typed para"]//text()').extract()) or \ strip_tag(response.xpath( '//div[contains(@class,"content-section")]//div[@class="typed para"]//text()').extract()) or "" item['doi'] = response.xpath( '//meta[@name="citation_doi"]/@content').get() item['authors'] = response.xpath( '//meta[@name="citation_author"]/@content').extract() # item['AffiliationInfo'] = response.xpath('//meta[@name="citation_author_institution"]/@content').extract() item['if_2017'] = 19.384 # "issnPrint" : "0003-4819" item['issn'] = "1539-3704" item["is_pubmed"] = 0 yield item