Example #1
0
    def populate_annos():
        from protcur import document as ptcdoc
        from protcur.core import annoSync
        from protcur.analysis import Hybrid, protc
        from hyputils.hypothesis import HypothesisHelper, group_to_memfile
        from hyputils import hypothesis as hyp
        ProtcurData.protc = protc
        group = auth.get('hypothesis-group')
        get_annos, annos, stream_thread, exit_loop = annoSync(
            group_to_memfile(group + 'sparcur'),
            helpers=(HypothesisHelper, Hybrid, protc),
            group=group,
            sync=False)

        # FIXME hack to workaround bad api key init for hyutils until we can integrate orthauth
        get_annos.api_token = auth.get('hypothesis-api-key')
        annos.clear()
        annos.extend([ptcdoc.Annotation(a) for a in get_annos()])

        # reset classes in case some other class has populated them
        # (e.g. during testing) FIXME this is a bad hack
        protc.reset()
        Hybrid.reset()

        # FIXME this is expensive and slow to continually recompute
        [protc(a, annos) for a in annos]
        [Hybrid(a, annos) for a in annos]
Example #2
0
    def populate_annos(group_name='sparc-curation'):
        from hyputils import hypothesis as hyp
        if hyp.api_token == 'TOKEN':  # FIXME does not work
            hyp.api_token = auth.get('hypothesis-api-key')

        group = auth.get('hypothesis-group')
        get_annos, annos, stream_thread, exit_loop = annoSync(group_to_memfile(group + 'sparcur'),
                                                              helpers=(HypothesisHelper, Hybrid, protc),
                                                              group=group,
                                                              sync=False)

        [protc(a, annos) for a in annos]
        [Hybrid(a, annos) for a in annos]
Example #3
0
def latest_ir(org_id=None):
    if org_id is None:
        org_id = auth.get('blackfynn-organization')

    export = Export(auth.get_path('export-path'),
                    None,
                    None,
                    None,
                    latest=True,
                    org_id=org_id)

    return export.latest_ir
Example #4
0
    def populate_annos():
        from protcur.core import annoSync
        from protcur.analysis import Hybrid, protc
        from hyputils.hypothesis import HypothesisHelper, group_to_memfile
        from hyputils import hypothesis as hyp
        ProtcurData.protc = protc
        group = auth.get('hypothesis-group')
        get_annos, annos, stream_thread, exit_loop = annoSync(
            group_to_memfile(group + 'sparcur'),
            helpers=(HypothesisHelper, Hybrid, protc),
            group=group,
            sync=False)

        # FIXME hack to workaround bad api key init for hyutils until we can integrate orthauth
        get_annos.api_token = auth.get('hypothesis-api-key')
        annos.clear()
        annos.extend(get_annos())

        # FIXME this is expensive and slow to continually recompute
        [protc(a, annos) for a in annos]
        [Hybrid(a, annos) for a in annos]
Example #5
0
    def populate_annos():
        from protcur import document as ptcdoc
        from protcur.core import annoSync
        from protcur.analysis import Hybrid, protc
        from hyputils.hypothesis import HypothesisHelper, group_to_memfile, AnnoReader
        from hyputils import hypothesis as hyp
        ProtcurData.protc = protc
        group = auth.get('hypothesis-group')
        memfile = group_to_memfile(group + 'sparcur')
        #ar = AnnoReader(memfile, group)
        #annos = ar.get_annos()
        #ud = [a.updated for a in annos]
        #lud_before = max(ud) if ud else None
        #del ud
        get_annos, annos, stream_thread, exit_loop = annoSync(
            memfile,
            helpers=(HypothesisHelper, Hybrid, protc),
            group=group,
            sync=False)

        # FIXME hack to workaround bad api key init for hyutils until we can integrate orthauth
        get_annos.api_token = auth.get('hypothesis-api-key')
        annos.clear()
        annos.extend([ptcdoc.Annotation(a) for a in get_annos()])
        #lud_after = max([a.updated for a in annos])

        # reset classes in case some other class has populated them
        # (e.g. during testing) FIXME this is a bad hack
        protc.reset()
        Hybrid.reset()

        #if lud_before == lud_after:  # no new annos
        # yeah we can use pickled ... but how :/
        #return ???

        # FIXME this is expensive and slow to continually recompute
        [protc(a, annos) for a in annos]
        [Hybrid(a, annos) for a in annos]
Example #6
0
    def _sparse_materialize(self, *args, sparse_limit=None):
        """ use data from the remote mark or clear datasets as sparse """
        if sparse_limit is None:
            sparse_limit = auth.get(
                'sparse-limit')  # yay for yaml having int type

        if self.is_dataset():
            package_count = self._package_count()
            sparse_remote = (False if sparse_limit is None else
                             package_count >= sparse_limit)
            sparse_cache = self.is_sparse()
            if sparse_remote:
                if not sparse_cache:
                    self._mark_sparse()
            elif sparse_cache:  # strange case where number of packages decreases
                self._clear_sparse()

        else:
            msg = 'at the moment only datasets can be marked as sparse'
            raise NotImplementedError(msg)
Example #7
0
 def setUpClass(cls):
     from sparcur.paths import BlackfynnCache, Path
     from sparcur.config import auth
     from sparcur.backends import BlackfynnRemote
     from sparcur.simple import pull, fetch_metadata_files, fetch_files
     test_datasets_real = auth.get_list('datasets-test')
     nosparse = [
         'N:dataset:bec4d335-9377-4863-9017-ecd01170f354',  # mbf headers
     ]
     test_datasets_real.extend(nosparse)
     slot = auth._pathit('{:user-cache-path}/sparcur/objects-temp')
     # FIXME slot needs to be handled transparently as an LRU cache
     # that has multiple folder levels and stores only by uuid
     # and probably lives in '{:user-cache-path}/sparcur/objects'
     slot = slot if slot.exists() else None
     cls.organization_id = auth.get('blackfynn-organization')
     cls.BlackfynnRemote = BlackfynnRemote._new(Path, BlackfynnCache)
     cls.BlackfynnRemote.init(cls.organization_id)
     cls.anchor = cls.BlackfynnRemote.smartAnchor(path_project_container)
     cls.anchor.local_data_dir_init(symlink_objects_to=slot)
     cls.project_path = cls.anchor.local
     list(cls.anchor.children)  # side effect to retrieve top level folders
     datasets = list(cls.project_path.children)
     cls.test_datasets = [
         d for d in datasets if d.cache_id in test_datasets_real
     ]
     [d.rmdir() for d in datasets
      if d.cache_id not in test_datasets_real]  # for sanity
     if not RealDataHelper._fetched:
         RealDataHelper._fetched = True  # if we fail we aren't going to try again
         [
             d._mark_sparse() for d in cls.test_datasets
             if d.cache_id not in nosparse
         ]  # keep pulls fastish
         pull.from_path_dataset_file_structure_all(cls.project_path,
                                                   paths=cls.test_datasets)
         fetch_metadata_files.main(cls.project_path)
         fetch_files.main(cls.project_path)
Example #8
0
        self.last_export_updated_datetime = None
        self.queued = False
        self.fetching = False
        self.exporting = False
        self.last_export_failed = None


defaults = {o.name:o.value if o.argcount else None
            for o in parse_defaults(clidoc)}
args = {**defaults, 'export': True, '--jobs': 1, 'schemas': False, 'protcur': False,
        '--no-network': True,  # XXX FIXME we need a way to fetch the data once and then reuse
        '--i-know-what-i-am-doing': True,
        'report': False, 'protocols': False,}  # FIXME separate args for protcur export
options = Options(args, defaults)

project_id = auth.get('remote-organization')
path_source_dir = Path('~/files/sparc-datasets-test').expanduser().resolve()  # FIXME hardcoded  XXX resolve required to avoid mismatches
if not path_source_dir.exists():
    path_source_dir.mkdir(parents=True)

cel = Celery('sparcur-cron',)

cel.conf.worker_hijack_root_logger = False
cel.conf.worker_prefetch_multiplier = 1

log.info(f'STATUS sparcur :id {project_id} :path {path_source_dir}')

# FIXME needed a dedicated worker for the cron queue
cel.conf.task_queues = (
    Queue('cron', Exchange('cron'), routing_key='task.cron',
          #max_priority=100,