Esempio n. 1
0
def curation_export_published(export_path, out_base=None):
    p = Path(export_path).expanduser().resolve()
    ce = OntResPath(p / 'curation-export.ttl')
    orps = [
        OntResPath(_) for _ in (p / 'datasets').children if _.suffix == '.ttl'
    ]
    graphs = [o.graph for o in orps]

    merged = _populate_published(ce, graphs)

    op = p if out_base is None else Path(out_base)
    merged.write(op / 'curation-export-published.ttl')
Esempio n. 2
0
 def setUp(self):
     Path._cache_class = BlackfynnCache
     self.project_path = Path(project_path)  # FIXME common overwrites?
     self.anchor = self.project_path.cache
     BlackfynnCache.setup(Path, BlackfynnRemoteFactory)
     self.anchor.remote  # trigger creation of _remote_class
     self.BlackfynnRemote = BlackfynnCache._remote_class
Esempio n. 3
0
    def __new__(cls, cache_anchor, local_class, host):
        # TODO decouple _new from init here as well
        session = pxssh.pxssh(options=dict(
            IdentityAgent=os.environ.get('SSH_AUTH_SOCK')))
        session.login(host,
                      ssh_config=Path('~/.ssh/config').expanduser().as_posix())
        cls._rows = 200
        cls._cols = 200
        session.setwinsize(cls._rows,
                           cls._cols)  # prevent linewraps of long commands
        session.prompt()
        atexit.register(lambda: (session.sendeof(), session.close()))
        cache_class = cache_anchor.__class__
        newcls = super().__new__(cls,
                                 local_class,
                                 cache_class,
                                 host=host,
                                 session=session)
        newcls._uid, *newcls._gids = [
            int(i) for i in (
                newcls._ssh('echo $(id -u) $(id -G)').decode().split(' '))
        ]

        newcls._cache_anchor = cache_anchor
        # must run before we can get the sysid, which is a bit odd
        # given that we don't actually sandbox the filesystem
        newcls._bind_sysid()

        return newcls
Esempio n. 4
0
    def __init__(self, path):
        self._errors = []
        if isinstance(path, str):
            path = Path(path)

        if not hasattr(self, 'path'):
            self.path = path
Esempio n. 5
0
def populate_existing_redis(conn):
    """ Set the initial state for exports from the file system. """
    # we intentionally do not go to network here because that will
    # be done by check_for_updates
    datasets_export_base = Path(options.export_path) / 'datasets'
    uuids = [c.name for c in datasets_export_base.children if c.is_dir()]
    for uuid in uuids:
        dataset_id = 'N:dataset:' + uuid
        try:
            # catch potentially malformed ids
            did = PennsieveId(dataset_id)
        except idlib.exc.MalformedIdentifierError as e:
            log.error(f'strange dir in dataset export: {uuid}\n{e}')
            continue

        # FIXME hardcoded convention
        latest = (datasets_export_base /
                  uuid / 'LATEST' / 'curation-export.json')
        if latest.exists():
            with open(latest, 'rt') as f:
                # we don't bother to use fromJson here because we just
                # need the raw values not the sparcur ir
                blob = json.load(f)
            updated = blob['meta']['timestamp_updated']
            #prov_commit = blob['prov']['commit']  # TODO need to be able to detect software changes and rerun
            sid = 'state-' + dataset_id
            uid = 'updated-' + dataset_id
            fid = 'failed-' + dataset_id
            conn.set(sid, _none)
            conn.set(uid, updated)
            conn.set(fid, '')

    log.info(pprint.pformat({k:conn.get(k) for k in
                             sorted(conn.keys()) if b'N:dataset' in k},
                            width=120))
Esempio n. 6
0
 def run_reasoner(self):
     graph = self._mis_graph()
     expanded_graph = self._mis_graph()
     [(graph.add(t), expanded_graph.add(t)) for t in self.triples()]
     closure = rdfc.OWLRL_Semantics
     rdfc.DeductiveClosure(closure).expand(expanded_graph)
     with open(Path(config.cache_dir, 'reasoned-curation-export.ttl'),
               'wb') as f:
         f.write(expanded_graph.serialize(format='nifttl'))
Esempio n. 7
0
    def __init__(
        self,
        export_path,
        export_source_path,
        folder_timestamp,
        timestamp,
        latest=False,
        partial=False,
        open_when_done=False,
        org_id=None,
        export_protcur_base=None,
        export_base=None,
    ):
        if org_id is None:
            self.export_source_path = export_source_path
            id = export_source_path.cache.anchor.identifier.uuid
        else:
            # do not set export_source_path, to prevent accidental export
            id = BlackfynnId(org_id).uuid

        self.export_path = Path(export_path)
        self.export_base = (export_base if export_base is not None else Path(
            export_path, id, self.export_type))
        self.latest = latest
        self.partial = partial
        self.folder_timestamp = folder_timestamp
        self.timestamp = timestamp
        self.open_when_done = open_when_done
        self.export_protcur_base = export_protcur_base  # pass in as export_base

        self._args = dict(
            export_path=export_path,
            export_source_path=export_source_path,
            folder_timestamp=folder_timestamp,
            timestamp=timestamp,
            latest=latest,
            partial=partial,
            open_when_done=open_when_done,
            org_id=org_id,
            export_protcur_base=export_protcur_base,
            export_base=export_base,
        )
Esempio n. 8
0
 def _mis_graph(self):
     """ for now easier to just get a fresh one, they are small """
     glb = pauth.get_path('git-local-base')
     olr = Path(glb / 'duplicates' / 'sparc-NIF-Ontology')
     graph = (rdflib.ConjunctiveGraph()
         .parse((olr / 'ttl/sparc-methods.ttl').as_posix(), format='turtle')
         #.parse((olr / 'ttl/methods-core.ttl').as_posix(), format='turtle')
         #.parse((olr / 'ttl/methods-helper.ttl').as_posix(), format='turtle')
         #.parse((olr / 'ttl/methods.ttl').as_posix(), format='turtle')
     )
     return graph
Esempio n. 9
0
 def _mis_graph(self):
     """ for now easier to just get a fresh one, they are small """
     olr = Path(
         devconfig.git_local_base) / 'duplicates' / 'sparc-NIF-Ontology'
     graph = (
         rdflib.ConjunctiveGraph().parse(
             (olr / 'ttl/sparc-methods.ttl').as_posix(), format='turtle')
         #.parse((olr / 'ttl/methods-core.ttl').as_posix(), format='turtle')
         #.parse((olr / 'ttl/methods-helper.ttl').as_posix(), format='turtle')
         #.parse((olr / 'ttl/methods.ttl').as_posix(), format='turtle')
     )
     return graph
Esempio n. 10
0
 def setUp(self):
     super().setUp(init_cache=False)
     hostname = gethostname()
     SshCache._local_class = Path
     Path.setup(SshCache, SshRemoteFactory
                )  # if this doesn't break something I will be surprised
     project_path = Path(self.test_path)
     self.project_path = project_path
     remote_root = PurePath(Path(__file__).parent)  # the 'remote' target
     remote_id = remote_root.as_posix()
     anchor = project_path.cache_init(
         remote_id, anchor=True
     )  # this_folder.meta is sort of one extra level of host keys
     # FIXME remote_root doesn't actually work for ssh remotes, it is always '/'
     #anchor = project_path.cache_init('/')  # this_folder.meta is sort of one extra level of host keys
     try:
         self.SshRemote = SshRemoteFactory(anchor, Path, hostname)
     except TypeError:  # pxssh fail
         self.SshRemote = SshRemoteFactory(anchor, Path,
                                           hostname + '-local')
     self.this_file = Path(__file__)
     self.this_file_darkly = self.SshRemote(__file__)
     tfd_cache = self.this_file_darkly.cache_init()
Esempio n. 11
0
    def test_meta(self):
        #hrm = this_file_darkly.meta.__dict__, this_file_darkly.local.meta.__dict__
        #assert hrm[0] == hrm[1]
        rm = self.this_file_darkly.meta
        lm = Path(__file__).meta

        rmnid = {k: v for k, v in rm.items() if k != 'id'}
        lmnid = {k: v for k, v in lm.items() if k != 'id'}
        bads = []
        for k, rv in rmnid.items():
            lv = lmnid[k]
            if rv != lv:
                bads.append((lv, rv))

        assert not bads, bads
Esempio n. 12
0
 def __init__(
     self,
     export_path,
     export_source_path,
     folder_timestamp,
     timestamp,
     latest=False,
     partial=False,
     open_when_done=False,
 ):
     self.export_source_path = export_source_path
     self.export_base = Path(export_path,
                             export_source_path.cache.anchor.id)
     self.latest = latest
     self.partial = partial
     self.folder_timestamp = folder_timestamp
     self.timestamp = timestamp
     self.open_when_done = open_when_done
Esempio n. 13
0
    def __init__(self,
                 export_path,
                 export_source_path,
                 folder_timestamp,
                 timestamp,
                 latest=False,
                 partial=False,
                 open_when_done=False,
                 org_id=None):
        if org_id is None:
            self.export_source_path = export_source_path
            id = export_source_path.cache.anchor.id
        else:
            # do not set export_source_path, to prevent accidental export
            id = org_id

        self.export_base = Path(export_path, id, self.export_type)
        self.latest = latest
        self.partial = partial
        self.folder_timestamp = folder_timestamp
        self.timestamp = timestamp
        self.open_when_done = open_when_done
Esempio n. 14
0
        self.queued = False
        self.fetching = False
        self.exporting = False
        self.last_export_failed = None


defaults = {o.name:o.value if o.argcount else None
            for o in parse_defaults(clidoc)}
args = {**defaults, 'export': True, '--jobs': 1, 'schemas': False, 'protcur': False,
        '--no-network': True,  # XXX FIXME we need a way to fetch the data once and then reuse
        '--i-know-what-i-am-doing': True,
        'report': False, 'protocols': False,}  # FIXME separate args for protcur export
options = Options(args, defaults)

project_id = auth.get('remote-organization')
path_source_dir = Path('~/files/sparc-datasets-test').expanduser().resolve()  # FIXME hardcoded  XXX resolve required to avoid mismatches
if not path_source_dir.exists():
    path_source_dir.mkdir(parents=True)

cel = Celery('sparcur-cron',)

cel.conf.worker_hijack_root_logger = False
cel.conf.worker_prefetch_multiplier = 1

log.info(f'STATUS sparcur :id {project_id} :path {path_source_dir}')

# FIXME needed a dedicated worker for the cron queue
cel.conf.task_queues = (
    Queue('cron', Exchange('cron'), routing_key='task.cron',
          #max_priority=100,
          queue_arguments={'x-max-priority': 10},
Esempio n. 15
0
def write_graphs(sgs, path=None):
    if path is None:
        path = Path(tempfile.tempdir) / 'protcur-individual'

    if not path.exists():
        path.mkdir()

    pp = path / 'published'
    if not pp.exists():
        pp.mkdir()

    hpath = path / 'html'
    if not hpath.exists():
        hpath.mkdir()

    hpp = hpath / 'published'
    if not hpp.exists():
        hpp.mkdir()

    opath = path / 'org'
    if not opath.exists():
        opath.mkdir()

    opp = opath / 'published'
    if not opp.exists():
        opp.mkdir()

    for wg in sgs:
        u = next(wg[:rdf.type:sparc.Protocol])
        published = bool(list(wg[u:TEMP.datasetPublishedDoi:]))
        try:
            pid = idlib.Pio(u)
            base = 'pio-' + pid.identifier.suffix
        except idlib.exc.IdlibError as e:
            pid = None
            base = (u
                    .replace('http://', '')
                    .replace('https://', '')
                    .replace('/', '_')
                    .replace('.', '_'))

        name = base + '.ttl'
        hname = base + '.html'
        oname = base + '.org'

        if published:
            wt_path = pp / name
            wh_path = hpp / hname
            wo_path = opp / oname
        else:
            wt_path = path / name
            wh_path = hpath / hname
            wo_path = opath / oname

        wg.write(wt_path)
        write_html(wg, wh_path)

        if pid is None:
            org = None
        else:
            #if wo_path.exists(): continue  # XXX remove after testing complete
            try:
                org = pid.asOrg()
            except idlib.exc.IdlibError as e:
                org = None

        if org is not None:
            with open(wo_path, 'wt') as f:
                f.write(org)
Esempio n. 16
0
import shutil
from pathlib import PurePosixPath
from datetime import datetime
from sparcur import config
from sparcur import exceptions as exc
from sparcur.paths import Path
from sparcur.paths import LocalPath, PrimaryCache, RemotePath
from sparcur.paths import XattrCache, SymlinkCache
from sparcur.state import State
from sparcur.pathmeta import PathMeta
from sparcur.datasets import Version1Header
from sparcur.curation import PathData, Integrator
from sparcur.blackfynn_api import FakeBFLocal
this_file = Path(__file__)
template_root = this_file.parent.parent / 'resources/DatasetTemplate'
print(template_root)
project_path = this_file.parent / 'test_local/test_project'
test_organization = 'N:organization:ba06d66e-9b03-4e3d-95a8-649c30682d2d'
test_dataset = 'N:dataset:5d167ba6-b918-4f21-b23d-cdb124780da1'

PathData.project_path = project_path

osk = Version1Header.skip_cols  # save original skips
Version1Header.skip_cols = tuple(
    _ for _ in osk if _ != 'example')  # use the example values for tests

ds_folders = 'ds1', 'ds2', 'ds3', 'ds4'
ds_roots = (
    'ds1',
    'ds2/ds2',
    'ds3/oops',
Esempio n. 17
0
import os
import shutil
from pathlib import PurePosixPath
from datetime import datetime
from augpathlib import PathMeta
from augpathlib.utils import onerror_windows_readwrite_remove
from sparcur import config
from sparcur import exceptions as exc
from sparcur.paths import Path
from sparcur.paths import LocalPath, PrimaryCache, RemotePath
from sparcur.paths import SymlinkCache
from sparcur.state import State
from sparcur.datasets import Version1Header
from sparcur.curation import PathData, Integrator
from sparcur.blackfynn_api import FakeBFLocal
this_file = Path(__file__)
template_root = this_file.parent.parent / 'resources/DatasetTemplate'
print(template_root)
project_path = this_file.parent / 'test_local/test_project'
fake_organization = 'N:organization:fake-organization-id'
project_path_real = this_file.parent / 'test_local/UCSD'
test_organization = 'N:organization:ba06d66e-9b03-4e3d-95a8-649c30682d2d'
test_dataset = 'N:dataset:5d167ba6-b918-4f21-b23d-cdb124780da1'

onerror = onerror_windows_readwrite_remove if os.name == 'nt' else None

osk = Version1Header.skip_cols  # save original skips
Version1Header.skip_cols = tuple(_ for _ in osk if _ != 'example')  # use the example values for tests

ds_roots = (
    'ds1',
Esempio n. 18
0
from tempfile import gettempdir
from pathlib import PurePosixPath
from datetime import datetime
import pytest
from augpathlib import PathMeta
from augpathlib.utils import onerror_windows_readwrite_remove
from sparcur import config
from sparcur import exceptions as exc
from sparcur.paths import Path
from sparcur.paths import LocalPath, PrimaryCache
from sparcur.paths import SymlinkCache
from sparcur.state import State
from sparcur.datasets import DatasetDescriptionFile
from sparcur.curation import PathData, Integrator
from sparcur.blackfynn_api import FakeBFLocal
this_file = Path(__file__).resolve()  # ARGH PYTHON ARGH NO LOL BAD PYTHON
examples_root = this_file.parent / 'examples'
template_root = this_file.parent.parent / 'resources/DatasetTemplate'
print(template_root)
_pid = os.getpid()
path_project_container = this_file.parent / f'test_local-{_pid}'
project_path = path_project_container / 'test_project'
fake_organization = 'N:organization:fake-organization-id'
project_path_real = path_project_container / 'UCSD'
test_organization = 'N:organization:ba06d66e-9b03-4e3d-95a8-649c30682d2d'
test_dataset = 'N:dataset:5d167ba6-b918-4f21-b23d-cdb124780da1'
temp_path = Path(gettempdir(), f'.sparcur-testing-base-{_pid}')

onerror = onerror_windows_readwrite_remove if os.name == 'nt' else None

SKIP_NETWORK = ('SKIP_NETWORK' in os.environ or 'FEATURES' in os.environ
Esempio n. 19
0
 def title(self):
     path = Path(self.path)
     return f'{path.name} {path.cache.dataset.name[:30]} ...'
Esempio n. 20
0
class OrganData:
    """ retrieve SPARC investigator data """

    url = ('https://commonfund.nih.gov/sites/default/'
           'files/sparc_nervous_system_graphic/main.html')

    def organ(self, award_number):
        if award_number in self.manual and award_number not in self.sourced:
            log.warning(f'used manual organ mapping for {award_number}')
        try:
            return self.award_to_organ[award_number]
        except KeyError as e:
            logd.error(f'bad award_number {award_number}')

    __call__ = organ

    organ_lookup = {
        'bladder': OntId('FMA:15900'),
        'brain': OntId('UBERON:0000955'),
        #'computer': OntId(''),
        'heart': OntId('FMA:7088'),
        'kidneys': OntId('FMA:7203'),
        'largeintestine': OntId('FMA:7201'),
        'liver': OntId('FMA:7197'),
        'lung': OntId('FMA:7195'),
        'malerepro': OntId('UBERON:0000079'),
        #'othertargets': OntId(''),
        'pancreas': OntId('FMA:7198'),
        'smallintestine': OntId('FMA:7200'),
        'spleen': OntId('FMA:7196'),
        'stomach': OntId('FMA:7148'),
        'vagus nerve': OntId('FMA:5731'),
        #'uterus': OntId('')
        '': None,
    }

    cache = Path(config.cache_dir, 'sparc-award-by-organ.json')
    old_cache = Path(config.cache_dir, 'award-mappings-old-to-new.json')

    def __init__(self,
                 path=config.organ_html_path,
                 organs_sheet=None):  # FIXME bad passing in organs
        self.path = path
        if not self.cache.exists():
            self.overview()
            with open(self.cache, 'wt') as f:
                json.dump(self.normalized, f)

            with open(self.old_cache, 'wt') as f:
                json.dump(self.former_to_current, f)
        else:
            with open(self.cache, 'rt') as f:
                self.normalized = json.load(f)

            with open(self.old_cache, 'rt') as f:
                self.former_to_current = json.load(f)

        if organs_sheet is not None:
            self._org = organs_sheet
            bc = self._org.byCol
            self.manual = {
                award if award else (award_manual if award_manual else None):
                [OntId(t) for t in organ_term.split(' ') if t]
                for award, award_manual, organ_term in zip(
                    bc.award, bc.award_manual, bc.organ_term) if organ_term
            }
        else:
            self.manual = {}

        self.sourced = {v: k for k, vs in self.normalized.items() for v in vs}
        self.award_to_organ = {
            **self.sourced,
            **self.manual
        }  # manual override

    def overview(self):
        if self.path.exists():
            with open(self.path, 'rb') as f:
                soup = BeautifulSoup(f.read(), 'lxml')
        else:
            resp = requests.get(self.url)
            soup = BeautifulSoup(resp.content, 'lxml')

        self.raw = {}
        self.former_to_current = {}
        for bsoup in soup.find_all(
                'div', {'id': lambda v: v and v.endswith('-bubble')}):
            organ, _ = bsoup['id'].split('-')
            award_list = self.raw[organ] = []
            for asoup in bsoup.find_all('a'):
                href = asoup['href']
                log.debug(href)
                parts = urlparse(href)
                query = parse_qs(parts.query)
                if 'projectnumber' in query:
                    award_list.extend(query['projectnumber'])
                elif 'aid' in query:
                    #aid = [int(a) for a in query['aid']]
                    #json = self.reporter(aid)
                    award, former = self.reporter(href)
                    award_list.append(award)
                    if former is not None:
                        award_list.append(
                            former)  # for this usecase this is ok
                        self.former_to_current[former] = award
                elif query:
                    log.debug(lj(query))

        self.former_to_current = {
            nml.NormAward(nml.NormAward(k)): nml.NormAward(nml.NormAward(v))
            for k, v in self.former_to_current.items()
        }
        self._normalized = {}
        self.normalized = {}
        for frm, to in ((self.raw, self._normalized), (self._normalized,
                                                       self.normalized)):
            for organ, awards in frm.items():
                if organ in self.organ_lookup:
                    organ = self.organ_lookup[organ].iri

                to[organ] = [nml.NormAward(a) for a in awards]

    def _reporter(self, aids):
        # can't seem to get this to cooperate
        base = ('https://api.federalreporter.nih.gov'
                '/v1/projects/FetchBySmApplIds')
        resp = requests.post(base,
                             json=aids,
                             headers={
                                 'Accept': 'application/json',
                                 'Content-Type': 'application/json'
                             })
        breakpoint()
        return resp.json()

    def reporter(self, href):
        resp = requests.get(href)
        soup = BeautifulSoup(resp.content, 'lxml')
        #id = soup.find_all('span', {'id': 'spnPNUMB'})
        table = soup.find_all('table', {'summary': 'Details'})
        text = table[0].find_all('td')[1].text.strip()
        if 'Former' in text:
            award, rest = text.split(' ', 1)
            rest, former = text.rsplit(' ', 1)
            return [award, former]
        else:
            return [text, None]
Esempio n. 21
0
class ProtocolData(dat.HasErrors):
    # this class is best used as a helper class not as a __call__ class

    def __init__(self, id=None):  # FIXME lots of ways to use this class ...
        self.id = id  # still needed for the converters use case :/
        super().__init__(pipeline_stage=self.__class__)

    def protocol(self, uri):
        return self._get_protocol_json(uri)

    __call__ = protocol

    @classmethod
    def setup(cls, creds_file=None):
        if creds_file is None:
            try:
                creds_file = devconfig.secrets('protocols-io', 'api',
                                               'creds-file')
            except KeyError as e:
                raise TypeError('creds_file is a required argument'
                                ' unless you have it in secrets') from e
        _pio_creds = get_protocols_io_auth(creds_file)
        cls._pio_header = QuietDict(
            {'Authorization': 'Bearer ' + _pio_creds.access_token})

    @classmethod
    def cache_path(cls):
        return config.protocol_cache_path

    @property
    def protocol_uris_resolved(self):
        if not hasattr(self, '_c_protocol_uris_resolved'):
            self._c_protocol_uris_resolved = list(self._protocol_uris_resolved)

        return self._c_protocol_uris_resolved

    @property
    def _protocol_uris_resolved(self):
        # FIXME quite slow ...
        for start_uri in self.protocol_uris:
            log.debug(start_uri)
            for end_uri in resolution_chain(start_uri):
                pass
            else:
                yield end_uri

    @property
    def protocol_annotations(self):
        for uri in self.protocol_uris_resolved:
            yield from protc.byIri(uri, prefix=True)

    @property
    def protocol_jsons(self):
        for uri in self.protocol_uris_resolved:
            yield self._get_protocol_json(uri)

    @cache(Path(config.cache_dir, 'protocol_json'))
    def _get_protocol_json(self, uri):
        #juri = uri + '.json'
        logd.info(uri)
        pi = get_right_id(uri)
        if 'protocols.io' in pi:
            pioid = pi.slug  # FIXME normalize before we ever get here ...
            log.info(pioid)
        else:
            msg = f'protocol uri is not from protocols.io {pi} {self.id}'
            logd.error(msg)
            self.addError(msg)
            return

        #uri_path = uri.rsplit('/', 1)[-1]
        apiuri = 'https://protocols.io/api/v3/protocols/' + pioid
        #'https://www.protocols.io/api/v3/groups/sparc/protocols'
        #apiuri = 'https://www.protocols.io/api/v3/filemanager/folders?top'
        #print(apiuri, header)
        log.debug('going to network for protocols')
        resp = requests.get(apiuri, headers=self._pio_header)
        #log.info(str(resp.request.headers))
        if resp.ok:
            try:
                j = resp.json()  # the api is reasonably consistent
            except BaseException as e:
                log.exception(e)
                breakpoint()
                raise e
            return j
        else:
            try:
                j = resp.json()
                sc = j['status_code']
                em = j['error_message']
                msg = f'protocol issue {uri} {resp.status_code} {sc} {em} {self.id!r}'
                logd.error(msg)
                self.addError(msg)
                # can't return here because of the cache
            except BaseException as e:
                log.exception(e)

            logd.error(f'protocol no access {uri} {self.id!r}')
Esempio n. 22
0
 def setUp(self):
     self.ds =  [dat.DatasetStructureLax(p) for p in Path(project_path).children]
Esempio n. 23
0
        return self._data_cache

    @hasSchema.f(sc.SummarySchema, fail=True)
    def data(self, timestamp=None):
        data = self._pipeline_end(timestamp)
        return data  # FIXME we want objects that wrap the output rather than generate it ...

    @hasSchema.f(sc.SummarySchema, fail=True)
    def data_for_export(self, timestamp):
        data = self._pipeline_end(timestamp)
        # NOTE this timestamps the cached data AS INTENDED
        data['prov']['timestamp_export_start'] = timestamp
        return data


_p = Path(tempfile.gettempdir()) / 'asdf'
_p.mkdir(exist_ok=True)  # FIXME XXXXXXXXXXXXXXXXXXXXXXXXXX


def datame(d,
           ca,
           timestamp,
           helpers=None,
           log_level=logging.INFO,
           dp=_p,
           evil=[False],
           dumb=False):
    """ sigh, pickles """
    log_names = ('sparcur', 'idlib', 'protcur', 'orthauth', 'ontquery',
                 'augpathlib', 'pyontutils')
    for log_name in log_names:
Esempio n. 24
0
import shutil
from tempfile import gettempdir
from pathlib import PurePosixPath
from datetime import datetime
from augpathlib import PathMeta
from augpathlib.utils import onerror_windows_readwrite_remove
from sparcur import config
from sparcur import exceptions as exc
from sparcur.paths import Path
from sparcur.paths import LocalPath, PrimaryCache, RemotePath
from sparcur.paths import SymlinkCache
from sparcur.state import State
from sparcur.datasets import DatasetDescriptionFile
from sparcur.curation import PathData, Integrator
from sparcur.blackfynn_api import FakeBFLocal
this_file = Path(__file__)
examples_root = this_file.parent / 'examples'
template_root = this_file.parent.parent / 'resources/DatasetTemplate'
print(template_root)
project_path = this_file.parent / 'test_local/test_project'
fake_organization = 'N:organization:fake-organization-id'
project_path_real = this_file.parent / 'test_local/UCSD'
test_organization = 'N:organization:ba06d66e-9b03-4e3d-95a8-649c30682d2d'
test_dataset = 'N:dataset:5d167ba6-b918-4f21-b23d-cdb124780da1'
temp_path = Path(gettempdir(), f'.sparcur-testing-base-{os.getpid()}')

onerror = onerror_windows_readwrite_remove if os.name == 'nt' else None

ddih = DatasetDescriptionFile.ignore_header  # save original skips
DatasetDescriptionFile.ignore_header = tuple(
    _ for _ in ddih if _ != 'example')  # use the example values for tests