async def _fetch_from_pubmed(self, doi=None, pmid=None, title=None): result = {'doi': None, 'pmid': None, 'title': None, 'authors': None, 'journal': None, 'date': None} if pmid is None: url = URL('https://pubmed.ncbi.nlm.nih.gov/') if doi is not None: query = doi elif title is not None: query = title else: return result encoded_query = 'term=' + urllib.parse.quote(f'"{query}"') url._val = urllib.parse.SplitResult(url._val.scheme, url._val.netloc, url._val.path, encoded_query, url._val.fragment) else: url = URL('https://pubmed.ncbi.nlm.nih.gov/' + pmid) retry = True while retry: async with self.session.get(url, ssl=sslcontext) as response: if response.status == 200: retry = False soup = BeautifulSoup(await response.read(), 'lxml') else: print(response.status) d_soup = soup.find('span', {'class': 'doi'}) if d_soup is None: if title is None: return result # This means there was more than one search result, # so we compare the titles of all of the search results # to our title and pick the closest one that is more # than 90% similar d_soup = soup.find_all('a', {'class': 'labs-docsum-title'}) max_score = 0 max_link = '' for d in d_soup: try: score = SequenceMatcher(None, title, d.text.strip()).ratio() except AttributeError: return result if score > max_score: max_score = score max_link = d['href'] if max_score > 0.9: async with self.session.get('https://pubmed.ncbi.nlm.nih.gov' + max_link, ssl=sslcontext) as response: soup = BeautifulSoup(await response.read(), 'lxml') d_soup = soup.find('span', {'class': 'doi'}) else: return result try: result['doi'] = d_soup.find('a').text.strip() except AttributeError: return result if doi is not None: assert doi.lower() == result['doi'].lower() result['pmid'] = soup.find('strong', {'class': 'current-id'}).text.strip() if pmid is not None: assert pmid == result['pmid'] cite = soup.find('span', {'class': 'cit'}).text date = cite[:cite.find(';')] result['date'] = self._parse_date(date).isoformat() a_list = soup.find('div', {'class': 'authors-list'}) if a_list is not None: authors = a_list.find_all('a', {'class': 'full-name'}) else: authors = [] result['authors'] = [] for a in authors: result['authors'].append(a.text.strip()) result['title'] = soup.find('h1', {'class': 'heading-title'}).text.strip() result['journal'] = soup.find( 'button', {'id': 'full-view-journal-trigger'})['title'].strip() return result