Ejemplo n.º 1
0
def Value(typecode_or_type, *args, lock=None):
    '''
    Return a synchronization wrapper for a Value
    '''
    obj = RawValue(typecode_or_type, *args)
    if lock is False:
        return obj
    if lock in (True, None):
        lock = RLock()
    if not hasattr(lock, 'acquire'):
        raise AttributeError("'%r' has no method 'acquire'" % lock)
    return synchronized(obj, lock)
Ejemplo n.º 2
0
    def map(self, fn, lazy=True, batched=False, num_workers=0):
        """
        Performs specific function on the dataset to transform and update every sample.

        Args:
            fn (callable): Transformations to be performed. It receives single
                sample as argument if batched is False. Else it receives all examples.
            lazy (bool, optional): If True, transformations would be delayed and
                performed on demand. Otherwise, transforms all samples at once. Note that 
                if `fn` is stochastic, `lazy` should be True or you will get the same
                result on all epochs. Defalt: False.
            batched(bool, optional): If True, transformations would take all examples as 
                input and return a collection of transformed examples. Note that if set 
                True, `lazy` option would be ignored. Defalt: False.
            num_workers(int, optional): Number of processes for multiprocessing. If 
                set to 0, it doesn't use multiprocessing. Note that if set to positive
                value, `lazy` option would be ignored. Defalt: 0.
        """

        assert num_workers >= 0, "num_workers should be a non-negative value"
        if num_workers > 0:
            with Pool(num_workers, initargs=(RLock(), )) as pool:

                def map_shard(num_workers, index, fn, batched):
                    self.shard(num_shards=num_workers,
                               index=index,
                               contiguous=True)
                    self._map(fn=fn, lazy=False, batched=batched)
                    return self

                kwds_per_shard = [
                    dict(num_workers=num_workers,
                         index=rank,
                         fn=fn,
                         batched=batched) for rank in range(num_workers)
                ]
                results = [
                    pool.apply_async(map_shard, kwds=kwds)
                    for kwds in kwds_per_shard
                ]
                transformed_shards = [r.get() for r in results]

                self.new_data = []
                for i in range(num_workers):
                    self.new_data += transformed_shards[i].new_data

            return self
        else:
            return self._map(fn, lazy=lazy, batched=batched)
Ejemplo n.º 3
0
def Array(typecode_or_type, size_or_initializer, **kwds):
    '''
    Return a synchronization wrapper for a RawArray
    '''
    lock = kwds.pop('lock', None)
    if kwds:
        raise ValueError('unrecognized keyword argument(s): %s' % kwds.keys())
    obj = RawArray(typecode_or_type, size_or_initializer)
    if lock is False:
        return obj
    if lock in (True, None):
        lock = RLock()
    if not hasattr(lock, 'acquire'):
        raise AttributeError("'%r' has no method 'acquire'" % lock)
    return synchronized(obj, lock)
Ejemplo n.º 4
0
    def filter(self, fn, num_workers=0):
        """
        Filters samples by the filter function and uses the filtered data to
        update this dataset.

        Args:
            fn (callable): A filter function that takes a sample as input and
                returns a boolean. Samples that return False would be discarded.
            num_workers(int, optional): Number of processes for multiprocessing. If 
                set to 0, it doesn't use multiprocessing. Defaults to `0`.
        """
        assert num_workers >= 0, "num_workers should be a non-negative value"
        if num_workers > 1:
            shards = [
                self._shard(
                    num_shards=num_workers, index=index, contiguous=True)
                for index in range(num_workers)
            ]
            kwds_per_shard = [
                dict(
                    self=shards[rank], fn=fn) for rank in range(num_workers)
            ]
            pool = Pool(num_workers, initargs=(RLock(), ))

            results = [
                pool.apply_async(
                    self.__class__._filter, kwds=kwds)
                for kwds in kwds_per_shard
            ]
            transformed_shards = [r.get() for r in results]

            pool.close()
            pool.join()
            self.new_data = []
            for i in range(num_workers):
                self.new_data += transformed_shards[i].new_data
            return self
        else:
            return self._filter(fn)
Ejemplo n.º 5
0
 def __init__(self, obj, lock=None):
     self._obj = obj
     self._lock = lock or RLock()
     self.acquire = self._lock.acquire
     self.release = self._lock.release