Ejemplo n.º 1
0
 def run_reasoner(self):
     graph = self._mis_graph()
     expanded_graph = self._mis_graph()
     [(graph.add(t), expanded_graph.add(t))
      for t in self.triples()]
     closure = rdfc.OWLRL_Semantics
     rdfc.DeductiveClosure(closure).expand(expanded_graph)
     with open(auth.get_path('cache-path') / 'reasoned-curation-export.ttl', 'wb') as f:
         f.write(expanded_graph.serialize(format='nifttl'))
Ejemplo n.º 2
0
    def __init__(self, *args, **kwargs):
        self._cache_path = auth.get_path('cache-path') / 'google_sheets'
        if not self._only_cache:
            try:
                if 'readonly' not in kwargs or kwargs['readonly']:
                    # readonly=True is default so we take this branch if not set
                    self._saf = auth.get_path(
                        'google-api-service-account-file-readonly')
                else:
                    self._saf = auth.get_path(
                        'google-api-service-account-file-rw')
            except KeyError as e:
                log.warn(e)
            except Exception as e:
                log.exception(e)

        try:
            super().__init__(*args, **kwargs)
        finally:
            self._saf = None
Ejemplo n.º 3
0
def latest_ir(org_id=None):
    if org_id is None:
        org_id = auth.get('blackfynn-organization')

    export = Export(auth.get_path('export-path'),
                    None,
                    None,
                    None,
                    latest=True,
                    org_id=org_id)

    return export.latest_ir
Ejemplo n.º 4
0
 def setup(cls, creds_file=None):
     if creds_file is None:
         try:
             creds_file = auth.get_path('protocols-io-api-creds-file')
         except KeyError as e:
             raise TypeError('creds_file is a required argument'
                             ' unless you have it in secrets') from e
     _pio_creds = get_protocols_io_auth(creds_file)
     cls._pio_header = QuietDict({'Authorization': 'Bearer ' + _pio_creds.token})
     _inst = cls()
     for wants in cls._instance_wanted_by:
         wants._protocol_data = _inst
Ejemplo n.º 5
0
    def _file_type_status_lookup(cls):
        import json  # FIXME
        if not hasattr(cls, '_sigh_ftslu'):
            resources = auth.get_path('resources')
            with open(resources / 'mimetypes.json', 'rt') as f:
                classification = json.load(f)

            mimetypes = {
                mimetype: status
                for status, objs in classification.items() for obj in objs
                for mimetype in (
                    obj['mimetype'] if is_list_or_tuple(obj['mimetype']) else (
                        obj['mimetype'], ))
            }
            suffixes = {
                obj['suffix']: status
                for status, objs in classification.items() for obj in objs
            }
            cls._mimetypes_lu, cls._suffixes_lu = mimetypes, suffixes
            cls._sigh_ftslu = True

        return cls._mimetypes_lu, cls._suffixes_lu
Ejemplo n.º 6
0
class RorInst(URIInstrumentation, RorId):
    @property
    def data(self):
        return self._data(self.suffix)

    @cache(Path(auth.get_path('cache-path'), 'ror_json'), create=True)
    def _data(self, suffix):
        # TODO data endpoint prefix ?? vs data endpoint pattern ...
        resp = requests.get(RorId(prefix='ror.api', suffix=suffix))
        if resp.ok:
            return resp.json()

    @property
    def name(self):
        return self.data['name']

    label = name  # map their schema to ours

    def asExternalId(self, id_class):
        eids = self.data['external_ids']
        if id_class._ror_key in eids:
            eid_record = eids[id_class._ror_key]
            if eid_record['preferred']:
                eid = eid_record['preferred']
            else:
                eid_all = eid_record['all']
                if isinstance(
                        eid_all, str
                ):  # https://github.com/ror-community/ror-api/issues/53
                    eid = eid_all
                else:
                    eid = eid_all[0]

            return id_class(eid)

    _type_map = {
        'Education': TEMP.Institution,
        'Healthcare': TEMP.Institution,
        'Facility': TEMP.CoreFacility,
        'Nonprofit': TEMP.Nonprofit,
        'Other': TEMP.Institution,
    }

    @property
    def institutionTypes(self):
        if 'types' in self.data:
            for t in self.data['types']:
                if t == 'Other':
                    log.info(self.label)

                yield self._type_map[t]

        else:
            log.critical(self.data)
            raise TypeError('wat')

    @property
    def synonyms(self):
        d = self.data
        # FIXME how to deal with type conversion an a saner way ...
        yield from [rdflib.Literal(s) for s in d['aliases']]
        yield from [rdflib.Literal(s) for s in d['acronyms']]
        yield from [
            rdflib.Literal(l['label'], lang=l['iso639']) for l in d['labels']
        ]

    @property
    def triples_gen(self):
        """ produce a triplified version of the record """
        s = self.u
        a = rdf.type
        yield s, a, owl.NamedIndividual
        for o in self.institutionTypes:
            yield s, a, o

        yield s, rdfs.label, rdflib.Literal(self.label)
        for o in self.synonyms:
            yield s, NIFRID.synonym, o  # FIXME this looses information about synonym type
Ejemplo n.º 7
0
    raise exc.NotInProjectError(f'{project_path}')

defaults = {
    o.name: o.value if o.argcount else None
    for o in parse_defaults(clidoc)
}
args = {
    'server': True,
    '--raw': False,
    '--latest': True,
    '--sort-count-desc': True,
    '--project-path': project_path,
    '--tab-table': False,
    '<path>': [],
    '--verbose': False,
    '--export-path': auth.get_path('export-path'),
    '--partial': False,
    '--open': False,
}

options = Options(args, defaults)
report = Report(options)

# set report paths that would normally be populated from Main
report.cwd = options.project_path
report.project_path = options.project_path
report.project_id = project_path.cache.id  # FIXME should not have to do this manually?
report.anchor = project_path.cache
report.summary = Summary(options.project_path)
report._timestamp = None  # FIXME
report._folder_timestamp = None  # FIXME
Ejemplo n.º 8
0
class ProtocolData(dat.HasErrors):
    # this class is best used as a helper class not as a __call__ class

    _instance_wanted_by = PioInst, PioUserInst

    def __init__(self, id=None):  # FIXME lots of ways to use this class ...
        self.id = id  # still needed for the converters use case :/
        # FIXME protocol data shouldn't need do know anything about
        # what dataset is using it, >_<
        super().__init__(pipeline_stage=self.__class__)

    def protocol(self, uri):
        return self._get_protocol_json(uri)

    __call__ = protocol

    @classmethod
    def setup(cls, creds_file=None):
        if creds_file is None:
            try:
                creds_file = auth.get_path('protocols-io-api-creds-file')
            except KeyError as e:
                raise TypeError('creds_file is a required argument'
                                ' unless you have it in secrets') from e
        _pio_creds = get_protocols_io_auth(creds_file)
        cls._pio_header = QuietDict(
            {'Authorization': 'Bearer ' + _pio_creds.access_token})
        _inst = cls()
        for wants in cls._instance_wanted_by:
            wants._protocol_data = _inst

    @classmethod
    def cache_path(cls):
        return config.protocol_cache_path

    @property
    def protocol_uris_resolved(self):
        if not hasattr(self, '_c_protocol_uris_resolved'):
            self._c_protocol_uris_resolved = list(self._protocol_uris_resolved)

        return self._c_protocol_uris_resolved

    @property
    def _protocol_uris_resolved(self):
        # FIXME quite slow ...
        for start_uri in self.protocol_uris:
            log.debug(start_uri)
            for end_uri in resolution_chain(start_uri):
                pass
            else:
                yield end_uri

    @property
    def protocol_annotations(self):
        for uri in self.protocol_uris_resolved:
            yield from protc.byIri(uri, prefix=True)

    @property
    def protocol_jsons(self):
        for uri in self.protocol_uris_resolved:
            yield self._get_protocol_json(uri)

    @cache(auth.get_path('cache-path') / 'protocol_json', create=True)
    def get(self, uri):
        #juri = uri + '.json'
        logd.info(uri)
        log.debug('going to network for protocols')
        resp = requests.get(uri, headers=self._pio_header)
        #log.info(str(resp.request.headers))
        if resp.ok:
            try:
                j = resp.json()  # the api is reasonably consistent
            except BaseException as e:
                log.exception(e)
                breakpoint()
                raise e
            return j
        else:
            try:
                j = resp.json()
                sc = j['status_code']
                em = j['error_message']
                msg = f'protocol issue {uri} {resp.status_code} {sc} {em} {self.id!r}'
                logd.error(msg)
                self.addError(msg)
                # can't return here because of the cache
            except BaseException as e:
                log.exception(e)

            logd.error(f'protocol no access {uri} {self.id!r}')

    @cache(auth.get_path('cache-path') / 'protocol_json', create=True)
    def _get_protocol_json(self, uri):
        #juri = uri + '.json'
        logd.info(uri)
        pi = get_right_id(uri)
        if 'protocols.io' in pi:
            pioid = pi.slug  # FIXME normalize before we ever get here ...
            log.info(pioid)
        else:
            msg = f'protocol uri is not from protocols.io {pi} {self.id}'
            logd.error(msg)
            self.addError(msg)
            return

        #uri_path = uri.rsplit('/', 1)[-1]
        apiuri = 'https://www.protocols.io/api/v3/protocols/' + pioid
        #'https://www.protocols.io/api/v3/groups/sparc/protocols'
        #apiuri = 'https://www.protocols.io/api/v3/filemanager/folders?top'
        #print(apiuri, header)
        log.debug('going to network for protocols')
        resp = requests.get(apiuri, headers=self._pio_header)
        #log.info(str(resp.request.headers))
        if resp.ok:
            try:
                j = resp.json()  # the api is reasonably consistent
            except BaseException as e:
                log.exception(e)
                breakpoint()
                raise e
            return j
        else:
            try:
                j = resp.json()
                sc = j['status_code']
                em = j['error_message']
                msg = f'protocol issue {uri} {resp.status_code} {sc} {em} {self.id!r}'
                logd.error(msg)
                self.addError(msg)
                # can't return here because of the cache
            except BaseException as e:
                log.exception(e)

            logd.error(f'protocol no access {uri} {self.id!r}')
Ejemplo n.º 9
0
class OrganData:
    """ retrieve SPARC investigator data """

    url = ('https://commonfund.nih.gov/sites/default/'
           'files/sparc_nervous_system_graphic/main.html')

    def organ(self, award_number):
        if award_number in self.manual and award_number not in self.sourced:
            log.warning(f'used manual organ mapping for {award_number}')
        try:
            return self.award_to_organ[award_number]
        except KeyError as e:
            logd.error(f'bad award_number {award_number}')

    __call__ = organ

    organ_lookup = {
        'bladder': OntId('FMA:15900'),
        'brain': OntId('UBERON:0000955'),
        #'computer': OntId(''),
        'heart': OntId('FMA:7088'),
        'kidneys': OntId('FMA:7203'),
        'largeintestine': OntId('FMA:7201'),
        'liver': OntId('FMA:7197'),
        'lung': OntId('FMA:7195'),
        'malerepro': OntId('UBERON:0000079'),
        #'othertargets': OntId(''),
        'pancreas': OntId('FMA:7198'),
        'smallintestine': OntId('FMA:7200'),
        'spleen': OntId('FMA:7196'),
        'stomach': OntId('FMA:7148'),
        'vagus nerve': OntId('FMA:5731'),
        #'uterus': OntId('')
        '': None,
    }

    cache = auth.get_path('cache-path') / 'sparc-award-by-organ.json'
    old_cache = auth.get_path('cache-path') / 'award-mappings-old-to-new.json'

    def __init__(self,
                 path=config.organ_html_path,
                 organs_sheet=None):  # FIXME bad passing in organs
        from bs4 import BeautifulSoup
        self._BeautifulSoup = BeautifulSoup
        self.path = path
        if not self.cache.exists():
            self.overview()
            with open(self.cache, 'wt') as f:
                json.dump(self.normalized, f)

            with open(self.old_cache, 'wt') as f:
                json.dump(self.former_to_current, f)
        else:
            with open(self.cache, 'rt') as f:
                self.normalized = json.load(f)

            with open(self.old_cache, 'rt') as f:
                self.former_to_current = json.load(f)

        if organs_sheet is not None:
            self._org = organs_sheet
            bc = self._org.byCol
            self.manual = {
                award if award else (award_manual if award_manual else None):
                [OntId(t) for t in organ_term.split(' ') if t]
                for award, award_manual, organ_term in zip(
                    bc.award, bc.award_manual, bc.organ_term) if organ_term
            }
        else:
            self.manual = {}

        self.sourced = {v: k for k, vs in self.normalized.items() for v in vs}
        self.award_to_organ = {
            **self.sourced,
            **self.manual
        }  # manual override

    def overview(self):
        if self.path.exists():
            with open(self.path, 'rb') as f:
                soup = self._BeautifulSoup(f.read(), 'lxml')
        else:
            resp = requests.get(self.url)
            soup = self._BeautifulSoup(resp.content, 'lxml')

        self.raw = {}
        self.former_to_current = {}
        for bsoup in soup.find_all(
                'div', {'id': lambda v: v and v.endswith('-bubble')}):
            organ, *_rest = bsoup['id'].split('-')
            logd.debug(_rest)
            award_list = self.raw[organ] = []
            for asoup in bsoup.find_all('a'):
                href = asoup['href']
                log.debug(href)
                parts = urlparse(href)
                query = parse_qs(parts.query)
                if 'projectnumber' in query:
                    award_list.extend(query['projectnumber'])
                elif 'aid' in query:
                    #aid = [int(a) for a in query['aid']]
                    #json = self.reporter(aid)
                    award, former = self.reporter(href)
                    award_list.append(award)
                    if former is not None:
                        award_list.append(
                            former)  # for this usecase this is ok
                        self.former_to_current[former] = award
                elif query:
                    log.debug(lj(query))

        self.former_to_current = {
            nml.NormAward(nml.NormAward(k)): nml.NormAward(nml.NormAward(v))
            for k, v in self.former_to_current.items()
        }
        self._normalized = {}
        self.normalized = {}
        for frm, to in ((self.raw, self._normalized), (self._normalized,
                                                       self.normalized)):
            for organ, awards in frm.items():
                if organ in self.organ_lookup:
                    organ = self.organ_lookup[organ].iri

                to[organ] = [nml.NormAward(a) for a in awards]

    def _reporter(self, aids):
        # can't seem to get this to cooperate
        base = ('https://api.federalreporter.nih.gov'
                '/v1/projects/FetchBySmApplIds')
        resp = requests.post(base,
                             json=aids,
                             headers={
                                 'Accept': 'application/json',
                                 'Content-Type': 'application/json'
                             })
        breakpoint()
        return resp.json()

    def reporter(self, href):
        resp = requests.get(href)
        soup = self._BeautifulSoup(resp.content, 'lxml')
        #id = soup.find_all('span', {'id': 'spnPNUMB'})
        table = soup.find_all('table', {'summary': 'Details'})
        if table:
            text = table[0].find_all('td')[1].text.strip()
            if 'Former' in text:
                award, rest = text.split(' ', 1)
                rest, former = text.rsplit(' ', 1)
                return [award, former]
            else:
                return [text, None]
        else:
            return ['', None]