Example #1
0
    def overview(self):
        if self.path.exists():
            with open(self.path, 'rb') as f:
                soup = self._BeautifulSoup(f.read(), 'lxml')
        else:
            resp = requests.get(self.url)
            soup = self._BeautifulSoup(resp.content, 'lxml')

        self.raw = {}
        self.former_to_current = {}
        for bsoup in soup.find_all(
                'div', {'id': lambda v: v and v.endswith('-bubble')}):
            organ, *_rest = bsoup['id'].split('-')
            logd.debug(_rest)
            award_list = self.raw[organ] = []
            for asoup in bsoup.find_all('a'):
                href = asoup['href']
                log.debug(href)
                parts = urlparse(href)
                query = parse_qs(parts.query)
                if 'projectnumber' in query:
                    award_list.extend(query['projectnumber'])
                elif 'aid' in query:
                    #aid = [int(a) for a in query['aid']]
                    #json = self.reporter(aid)
                    award, former = self.reporter(href)
                    award_list.append(award)
                    if former is not None:
                        award_list.append(
                            former)  # for this usecase this is ok
                        self.former_to_current[former] = award
                elif query:
                    log.debug(lj(query))

        self.former_to_current = {
            nml.NormAward(nml.NormAward(k)): nml.NormAward(nml.NormAward(v))
            for k, v in self.former_to_current.items()
        }
        self._normalized = {}
        self.normalized = {}
        for frm, to in ((self.raw, self._normalized), (self._normalized,
                                                       self.normalized)):
            for organ, awards in frm.items():
                if organ in self.organ_lookup:
                    organ = self.organ_lookup[organ].iri

                to[organ] = [nml.NormAward(a) for a in awards]
Example #2
0
 def award_number(raw_award_number, funding) -> str:
     return nml.NormAward(nml.NormAward(raw_award_number))