def populate_annos(): from protcur import document as ptcdoc from protcur.core import annoSync from protcur.analysis import Hybrid, protc from hyputils.hypothesis import HypothesisHelper, group_to_memfile from hyputils import hypothesis as hyp ProtcurData.protc = protc group = auth.get('hypothesis-group') get_annos, annos, stream_thread, exit_loop = annoSync( group_to_memfile(group + 'sparcur'), helpers=(HypothesisHelper, Hybrid, protc), group=group, sync=False) # FIXME hack to workaround bad api key init for hyutils until we can integrate orthauth get_annos.api_token = auth.get('hypothesis-api-key') annos.clear() annos.extend([ptcdoc.Annotation(a) for a in get_annos()]) # reset classes in case some other class has populated them # (e.g. during testing) FIXME this is a bad hack protc.reset() Hybrid.reset() # FIXME this is expensive and slow to continually recompute [protc(a, annos) for a in annos] [Hybrid(a, annos) for a in annos]
def populate_annos(group_name='sparc-curation'): from hyputils import hypothesis as hyp if hyp.api_token == 'TOKEN': # FIXME does not work hyp.api_token = auth.get('hypothesis-api-key') group = auth.get('hypothesis-group') get_annos, annos, stream_thread, exit_loop = annoSync(group_to_memfile(group + 'sparcur'), helpers=(HypothesisHelper, Hybrid, protc), group=group, sync=False) [protc(a, annos) for a in annos] [Hybrid(a, annos) for a in annos]
def latest_ir(org_id=None): if org_id is None: org_id = auth.get('blackfynn-organization') export = Export(auth.get_path('export-path'), None, None, None, latest=True, org_id=org_id) return export.latest_ir
def populate_annos(): from protcur.core import annoSync from protcur.analysis import Hybrid, protc from hyputils.hypothesis import HypothesisHelper, group_to_memfile from hyputils import hypothesis as hyp ProtcurData.protc = protc group = auth.get('hypothesis-group') get_annos, annos, stream_thread, exit_loop = annoSync( group_to_memfile(group + 'sparcur'), helpers=(HypothesisHelper, Hybrid, protc), group=group, sync=False) # FIXME hack to workaround bad api key init for hyutils until we can integrate orthauth get_annos.api_token = auth.get('hypothesis-api-key') annos.clear() annos.extend(get_annos()) # FIXME this is expensive and slow to continually recompute [protc(a, annos) for a in annos] [Hybrid(a, annos) for a in annos]
def populate_annos(): from protcur import document as ptcdoc from protcur.core import annoSync from protcur.analysis import Hybrid, protc from hyputils.hypothesis import HypothesisHelper, group_to_memfile, AnnoReader from hyputils import hypothesis as hyp ProtcurData.protc = protc group = auth.get('hypothesis-group') memfile = group_to_memfile(group + 'sparcur') #ar = AnnoReader(memfile, group) #annos = ar.get_annos() #ud = [a.updated for a in annos] #lud_before = max(ud) if ud else None #del ud get_annos, annos, stream_thread, exit_loop = annoSync( memfile, helpers=(HypothesisHelper, Hybrid, protc), group=group, sync=False) # FIXME hack to workaround bad api key init for hyutils until we can integrate orthauth get_annos.api_token = auth.get('hypothesis-api-key') annos.clear() annos.extend([ptcdoc.Annotation(a) for a in get_annos()]) #lud_after = max([a.updated for a in annos]) # reset classes in case some other class has populated them # (e.g. during testing) FIXME this is a bad hack protc.reset() Hybrid.reset() #if lud_before == lud_after: # no new annos # yeah we can use pickled ... but how :/ #return ??? # FIXME this is expensive and slow to continually recompute [protc(a, annos) for a in annos] [Hybrid(a, annos) for a in annos]
def _sparse_materialize(self, *args, sparse_limit=None): """ use data from the remote mark or clear datasets as sparse """ if sparse_limit is None: sparse_limit = auth.get( 'sparse-limit') # yay for yaml having int type if self.is_dataset(): package_count = self._package_count() sparse_remote = (False if sparse_limit is None else package_count >= sparse_limit) sparse_cache = self.is_sparse() if sparse_remote: if not sparse_cache: self._mark_sparse() elif sparse_cache: # strange case where number of packages decreases self._clear_sparse() else: msg = 'at the moment only datasets can be marked as sparse' raise NotImplementedError(msg)
def setUpClass(cls): from sparcur.paths import BlackfynnCache, Path from sparcur.config import auth from sparcur.backends import BlackfynnRemote from sparcur.simple import pull, fetch_metadata_files, fetch_files test_datasets_real = auth.get_list('datasets-test') nosparse = [ 'N:dataset:bec4d335-9377-4863-9017-ecd01170f354', # mbf headers ] test_datasets_real.extend(nosparse) slot = auth._pathit('{:user-cache-path}/sparcur/objects-temp') # FIXME slot needs to be handled transparently as an LRU cache # that has multiple folder levels and stores only by uuid # and probably lives in '{:user-cache-path}/sparcur/objects' slot = slot if slot.exists() else None cls.organization_id = auth.get('blackfynn-organization') cls.BlackfynnRemote = BlackfynnRemote._new(Path, BlackfynnCache) cls.BlackfynnRemote.init(cls.organization_id) cls.anchor = cls.BlackfynnRemote.smartAnchor(path_project_container) cls.anchor.local_data_dir_init(symlink_objects_to=slot) cls.project_path = cls.anchor.local list(cls.anchor.children) # side effect to retrieve top level folders datasets = list(cls.project_path.children) cls.test_datasets = [ d for d in datasets if d.cache_id in test_datasets_real ] [d.rmdir() for d in datasets if d.cache_id not in test_datasets_real] # for sanity if not RealDataHelper._fetched: RealDataHelper._fetched = True # if we fail we aren't going to try again [ d._mark_sparse() for d in cls.test_datasets if d.cache_id not in nosparse ] # keep pulls fastish pull.from_path_dataset_file_structure_all(cls.project_path, paths=cls.test_datasets) fetch_metadata_files.main(cls.project_path) fetch_files.main(cls.project_path)
self.last_export_updated_datetime = None self.queued = False self.fetching = False self.exporting = False self.last_export_failed = None defaults = {o.name:o.value if o.argcount else None for o in parse_defaults(clidoc)} args = {**defaults, 'export': True, '--jobs': 1, 'schemas': False, 'protcur': False, '--no-network': True, # XXX FIXME we need a way to fetch the data once and then reuse '--i-know-what-i-am-doing': True, 'report': False, 'protocols': False,} # FIXME separate args for protcur export options = Options(args, defaults) project_id = auth.get('remote-organization') path_source_dir = Path('~/files/sparc-datasets-test').expanduser().resolve() # FIXME hardcoded XXX resolve required to avoid mismatches if not path_source_dir.exists(): path_source_dir.mkdir(parents=True) cel = Celery('sparcur-cron',) cel.conf.worker_hijack_root_logger = False cel.conf.worker_prefetch_multiplier = 1 log.info(f'STATUS sparcur :id {project_id} :path {path_source_dir}') # FIXME needed a dedicated worker for the cron queue cel.conf.task_queues = ( Queue('cron', Exchange('cron'), routing_key='task.cron', #max_priority=100,