Ejemplo n.º 1
0
def test_serializable_locks(c, s, a, b):
    def f(x, lock=None):
        with lock:
            return x + 1

    # note, the creation of Lock needs to be done inside a cluster
    for lock in [HDF5_LOCK, Lock(), Lock('filename.nc'),
                 CombinedLock([HDF5_LOCK]),
                 CombinedLock([HDF5_LOCK, Lock('filename.nc')])]:

        futures = c.map(f, list(range(10)), lock=lock)
        yield c.gather(futures)

        lock2 = pickle.loads(pickle.dumps(lock))
        assert type(lock) == type(lock2)
Ejemplo n.º 2
0
 def __getitem__(self, item):
     path = os.path.join(self.path, item)
     try:  # this will work if dask.distributed workers exist
         lock = Lock(path)
     except AttributeError:  # otherwise default to the interprocesslock used by zarr
         lock = InterProcessLock(path)
     return lock
Ejemplo n.º 3
0
def lock_for_conflicts(conflicts, base_name="pangeo-forge"):

    try:
        global_client = get_client()
        is_distributed = True
    except ValueError:
        # Don't bother with locks if we are not in a distributed context
        # NOTE! This means we HAVE to use dask.distributed as our parallel execution enviroment
        # This should be compatible with Prefect.
        is_distributed = False
    if is_distributed:
        locks = [Lock(f"{base_name}-{c}", global_client) for c in conflicts]
        for lock in locks:
            logger.debug(f"Acquiring lock {lock.name}...")
            lock.acquire()
            logger.debug(f"Acquired lock {lock.name}")
    else:
        logger.debug(f"Asked to lock {conflicts} but no Dask client found.")
    try:
        yield
    finally:
        if is_distributed:
            for lock in locks:
                lock.release()
                logger.debug(f"Released lock {lock.name}")
Ejemplo n.º 4
0
def lock_for_conflicts(conflicts, base_name="pangeo-forge", timeout=None):
    """
    Parameters
    ----------
    timeout : int, optional
        The time to wait *for each lock*.
    """

    try:
        global_client = get_client()
        is_distributed = True
    except ValueError:
        # Don't bother with locks if we are not in a distributed context
        # NOTE! This means we HAVE to use dask.distributed as our parallel execution enviroment
        # This should be compatible with Prefect.
        is_distributed = False
    if is_distributed:
        locks = [Lock(f"{base_name}-{c}", global_client) for c in conflicts]
        for lock in locks:
            logger.debug(f"Acquiring lock {lock.name}...")
            acquired = lock.acquire(timeout=timeout)
            if not acquired:
                logger.warning("Failed to acquire lock %s before timeout %s", lock.name, timeout)
                raise ValueError(f"Failed to acquire lock {lock.name} before timeout {timeout}")
            logger.debug(f"Acquired lock {lock.name}")
    else:
        logger.debug(f"Asked to lock {conflicts} but no Dask client found.")
    try:
        yield
    finally:
        if is_distributed:
            for lock in locks:
                lock.release()
                logger.debug(f"Released lock {lock.name}")
Ejemplo n.º 5
0
def touch_random_unused_file(base_dir:Path, ext:Optional[str]=None)->Path:
  assert base_dir.is_dir()
  if ext is None:
    ext = ""
  elif ext[0] != ".":
    ext = f".{ext}"
  lock = Lock(f"dir_lock:{base_dir.name}")
  while(not lock.acquire(timeout=5)):
    pass
  # THREADSAFE
  name = f"{get_random_ascii_str(10)}{ext}"
  path = base_dir.joinpath(name)
  while path.is_file():
    name = f"{get_random_ascii_str(10)}{ext}"
    path = base_dir.joinpath(name)
  path.touch()
  # End Threadsafe
  lock.release()
  return path
Ejemplo n.º 6
0
def _flush_to_batches(redis_key, name):
    lock = Lock(redis_key)
    # TODO set timeout and handle
    if lock.acquire(timeout=1):
        try:
            processor = SchemaPreprocessor(name)
            batch_writer = BatchWriter(name)

            # Get the batch and remove the read range atomically
            with rd.pipeline() as pipe:
                pipe.multi()
                pipe.lrange(redis_key, 0, Config.batches.size - 1)
                pipe.ltrim(redis_key, Config.batches.size, -1)
                batch = pipe.execute()[0]

            batch_matrix = processor.json_blobs_to_matrix(batch)
            batch_writer.write_batch_matrix(batch_matrix)
        finally:
            lock.release()
    else:
        raise Reschedule()
Ejemplo n.º 7
0
def get_scheduler_lock(scheduler, path_or_file=None):
    """ Get the appropriate lock for a certain situation based onthe dask
       scheduler used.

    See Also
    --------
    dask.utils.get_scheduler_lock
    """

    if scheduler == 'distributed':
        from dask.distributed import Lock
        return Lock(path_or_file)
    elif scheduler == 'multiprocessing':
        return multiprocessing.Lock()
    elif scheduler == 'threaded':
        from dask.utils import SerializableLock
        return SerializableLock()
    else:
        return threading.Lock()
Ejemplo n.º 8
0
Archivo: locks.py Proyecto: visr/xarray
def _get_distributed_lock(key):
    from dask.distributed import Lock
    return Lock(key)
        save_mats(count, runName, AA_mutation,nucleotide_mutation)
        print('DONE ! ')
        brake.set(False)
        return None

    #######start the sankof algo here #######################
    print('starting sankof')
    #scale cluster
    #scatter the blank tree and row index for each process
    #remote_tree = client.scatter(tree)

    remote_index = client.scatter(IDindex)

    inq = Queue('inq')
    outq = Queue('outq')
    lock = Lock('x')

    stopiter = Variable(False)
    brake = Variable(True)


    saver_started = False
    workers_started = False

    #start workers
    for workers in range(NCORE*ncpu ):
        w = client.submit(  calculate_small_parsimony , inq= None ,outq = None  ,stopiter= stopiter ,  treefile=treefile , bootstrap_replicates = bootstrap_replicates,
        matfile= alnfile+'.h5' ,  row_index= remote_index , iolock = lock, verbose  = False  )
        fire_and_forget(w)

    s = client.submit(  collect_futures , queue= None , stopiter=stopiter , brake = brake, runName= runName , nucleotides_only =False  )
Ejemplo n.º 10
0
def quick_proc(ds,
               opts,
               label_raw,
               label,
               client,
               reference=None,
               pxmask=None):

    reference = imread(opts.reference) if reference is None else reference
    pxmask = imread(opts.pxmask) if pxmask is None else pxmask

    stack = ds.stacks[label_raw]
    #     stk_del = ds.stacks['label_raw'].to_delayed().ravel()

    # get array names and shapes by correcting a single image (the last one)
    sample_res = _fast_correct(stack[-1:, ...].compute(scheduler='threading'),
                               opts=opts,
                               data_key=ds.data_pattern + '/' + label,
                               shots_grp=ds.shots_pattern,
                               peaks_grp=ds.data_pattern)

    #     print({k: v.dtype for k, v in sample_res.items()})

    # initialize file structure
    for (file, subset), grp in ds.shots.groupby(['file', 'subset']):
        with h5py.File(file, 'a') as fh:
            for pattern, data in sample_res.items():
                path = pattern.replace('%', subset)
                #                 print('Initializing', file, path)
                fh.require_dataset(path,
                                   shape=(len(grp), ) + data.shape[1:],
                                   dtype=data.dtype,
                                   chunks=(1, ) + data.shape[1:],
                                   compression=opts.compression)
            fh[ds.data_pattern.replace('%', subset)].attrs['signal'] = label

    # array of integers corresponding to the chunk number
    chunk_label = np.concatenate(
        [np.repeat(ii, cs) for ii, cs in enumerate(stack.chunks[0])])

    # delay objects returning the image and info dictionary
    cmp_del = [
        dask.delayed(_fast_correct)(raw_chk, opts)
        for raw_chk in ds.raw_counts.to_delayed().ravel()
    ]

    # file lock objects
    locks = {fn: Lock() for fn in ds.files}

    # make delay objects for writing results to file (= maximum side effects!)
    dels = []
    for chks, (cl, sht) in zip(cmp_del, ds.shots.groupby(chunk_label)):
        assert len(sht.drop_duplicates(['file', 'subset'])) == 1
        ii_to = sht.shot_in_subset.values
        dels.append(
            dask.delayed(nexus._save_single_chunk_multi)(
                chks,
                file=sht.file.values[0],
                subset=sht.subset.values[0],
                idcs=ii_to,
                lock=locks[sht.file.values[0]]))

    # random.shuffle(dels) # shuffling tasks to minimize concurrent file access
    chunk_info = client.compute(dels, sync=True)
    return pd.DataFrame(chunk_info,
                        columns=['file', 'subset', 'path', 'shot_in_subset'])