def overview(self): if self.path.exists(): with open(self.path, 'rb') as f: soup = self._BeautifulSoup(f.read(), 'lxml') else: resp = requests.get(self.url) soup = self._BeautifulSoup(resp.content, 'lxml') self.raw = {} self.former_to_current = {} for bsoup in soup.find_all( 'div', {'id': lambda v: v and v.endswith('-bubble')}): organ, *_rest = bsoup['id'].split('-') logd.debug(_rest) award_list = self.raw[organ] = [] for asoup in bsoup.find_all('a'): href = asoup['href'] log.debug(href) parts = urlparse(href) query = parse_qs(parts.query) if 'projectnumber' in query: award_list.extend(query['projectnumber']) elif 'aid' in query: #aid = [int(a) for a in query['aid']] #json = self.reporter(aid) award, former = self.reporter(href) award_list.append(award) if former is not None: award_list.append( former) # for this usecase this is ok self.former_to_current[former] = award elif query: log.debug(lj(query)) self.former_to_current = { nml.NormAward(nml.NormAward(k)): nml.NormAward(nml.NormAward(v)) for k, v in self.former_to_current.items() } self._normalized = {} self.normalized = {} for frm, to in ((self.raw, self._normalized), (self._normalized, self.normalized)): for organ, awards in frm.items(): if organ in self.organ_lookup: organ = self.organ_lookup[organ].iri to[organ] = [nml.NormAward(a) for a in awards]
def award_number(raw_award_number, funding) -> str: return nml.NormAward(nml.NormAward(raw_award_number))