def compute_cov_cpu(lambda0, coords, n_procs):
    """ Massively parallelized version (on CPU)

    """
    inv_lambda_2 = -np.sqrt(3) / lambda0

    n_cells = coords.shape[0]
    n_dim_coords = coords.shape[1]
    cov_shape = (n_cells, n_cells)
    cov_shared_buffer = RawArray('d', n_cells * n_cells)
    # Wrap as a numpy array so we can easily manipulates its data.
    cov_np = np.frombuffer(cov_shared_buffer).reshape(cov_shape)
    # Copy data to our shared array.
    np.copyto(cov_np, np.zeros(cov_shape))

    coords_shared_buffer = RawArray('d', n_cells * n_dim_coords)
    # Wrap as a numpy array so we can easily manipulates its data.
    coords_np = np.frombuffer(coords_shared_buffer).reshape(coords_shape)
    # Copy data to our shared array.
    np.copyto(coords_np, coords)

    # Start the process pool and do the computation.
    # Here we pass X and X_shape to the initializer of
    # each worker.
    # (Because X_shape is not a shared variable,
    # it will be copied to each
    # child process.)
    with Pool(processes=n_procs,
              initializer=init_worker,
              initargs=(cov_shared_buffer, cov_shape, coords_shared_buffer,
                        coords_shape, inv_lambda_2)) as pool:
        result = pool.map(_worker_func, range(coords_shape[0]))

    return cov_np
Example #2
0
    def _set_gym_matrices(self):
        # set the action space
        num_actions = self.topo.get_num_hosts()
        min_bw = 10000.0 / float(self.topo.conf["max_capacity"])
        self.action_min = np.empty(num_actions)
        self.action_min.fill(min_bw)
        self.action_max = np.empty(num_actions)
        self.action_max.fill(1.0)
        #        self.action_space = spaces.Box(
        #            low=action_min, high=action_max, dtype=np.float32)
        # Initialize the action arrays shared with the control manager
        # Qdisc do not go beyond uint32 rate limit which is about 4Gbps
        tx_rate = RawArray(ctypes.c_uint32, num_actions)
        self.tx_rate = dc_utils.shmem_to_nparray(tx_rate, np.float32)
        active_rate = RawArray(ctypes.c_uint32, num_actions)
        self.active_rate = dc_utils.shmem_to_nparray(active_rate, np.float32)
        log.info("%s Setting action space", (self.short_id))
        log.info("from %s", self.action_min)
        log.info("to %s", self.action_max)

        # set the observation space
        num_ports = self.topo.get_num_sw_ports()
        num_features = len(self.conf["state_model"])
        if self.conf["collect_flows"]:
            num_features += num_actions * 2
        obs_min = np.empty(num_ports * num_features + num_actions)
        obs_min.fill(-np.inf)
        obs_max = np.empty(num_ports * num_features + num_actions)
        obs_max.fill(np.inf)
    def __init__(self, init_dict=None):
        """Create a shared memory version of each element of the initial
        dictionary. Creates an empty array otherwise, which will extend
        automatically when keys are added.

        Each different type (all supported types listed in the `types` array
        above) has its own array. For each key we store an index into the
        appropriate array as well as the type of value stored for that key.
        """
        # idx is dict of {key: (array_idx, value_type)}
        self.idx = {}
        # arrays is dict of {value_type: array_of_ctype}
        self.arrays = {}
        if init_dict:
            sizes = {typ: 0 for typ in self.types.keys()}
            for v in init_dict.values():
                if type(v) not in sizes:
                    raise TypeError('SharedTable does not support values of ' +
                                    'type ' + str(type(v)))
                sizes[type(v)] += 1
            for typ, sz in sizes.items():
                self.arrays[typ] = RawArray(self.types[typ], sz)
            idxs = {typ: 0 for typ in self.types.keys()}
            for k, v in init_dict.items():
                val_type = type(v)
                self.idx[k] = (idxs[val_type], val_type)
                if val_type == str:
                    v = sys.intern(v)
                self.arrays[val_type][idxs[val_type]] = v
                idxs[val_type] += 1
        # initialize any needed empty arrays
        for typ, ctyp in self.types.items():
            if typ not in self.arrays:
                self.arrays[typ] = RawArray(ctyp, 0)
        self.lock = Lock()
Example #4
0
    def start_process(self, max_pulses=None):
        if self.__process.is_alive():
            print('Process is already running')
            return

        if max_pulses is None:
            max_pulses = self.__config['DAQ']['BufferLength']

        print('Starting process...')

        if not (self.is_valid):
            print(
                'You have to start the process manually by calling <object>.start_process()!'
            )
            return
        self._synchpulsetimes = RawArray('d', [-1] * max_pulses)
        self._buttonstates = RawArray('b', [0] * self.number_of_buttons)
        self._buttonpresstimes = [
            RawArray('d', [-1] * max_pulses)
            for n in range(0, self.number_of_buttons)
        ]
        self._select_buttons = RawArray(
            'b', [1] * self.number_of_buttons)  # record only selected buttons
        self._button_record_period = RawArray(
            'd', [0, inf])  # record buttons only in this period
        self.__readout_time = [
            self.__readout_time[0]
        ] + [self.__readout_time[1]] * self.number_of_buttons
        self.__process = Process(target=self._run)
        self.__process.start()
        while not (self.is_alive):
            pass
        print('[{:.3f}s] - Process is running'.format(self.clock))
Example #5
0
def create_multiproc_matrix(state_basis, state_occup, vals, vmat, N):
    global state_basis_sh
    global state_occup_sh
    global vals_sh
    global vmat_sh
    global matrix_sh
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        state_basis_ct = np.ctypeslib.as_ctypes(state_basis)
        state_occup_ct = np.ctypeslib.as_ctypes(state_occup)
        vals_ct = np.ctypeslib.as_ctypes(vals)
        vmat_ct = np.ctypeslib.as_ctypes(vmat.view(dtype='float64'))
        state_basis_sh = RawArray(state_basis_ct._type_, state_basis_ct)
        state_occup_sh = RawArray(state_occup_ct._type_, state_occup_ct)
        vals_sh = RawArray(vals_ct._type_, vals_ct)
        vmat_sh = RawArray(vmat_ct._type_, vmat_ct)

        dim = len(state_basis)
        block_size = dim / 20
        matrix = np.zeros((dim, dim), dtype=complex)
        matrix_ct = np.ctypeslib.as_ctypes(matrix.view(dtype='float64'))
        matrix_sh = RawArray(matrix_ct._type_, matrix_ct)

        func = partial(fill_per_window_sh, N=N, block_size=block_size)
        idxs = [(i, min(i + block_size, dim))
                for i in xrange(0, dim, block_size)]

        p = Pool(processes=10)
        res = p.map(func, idxs)
        np.copyto(matrix,
                  np.ctypeslib.as_array(matrix_sh).view(dtype='complex128'))

    return matrix
Example #6
0
    def begin_compute(cls):
        """Starts the Mandelbrot set computation."""
        no_val = "No {} value set!"
        essential_values = [
            "ITERATIONS",
            "PIXWIDTH",
            "PIXHEIGHT",
            "STEP",
            "RMIN",
            "RMAX",
            "IMIN",
            "IMAX",
            "PALLETE",
        ]

        for v in essential_values:
            if getattr(cls, v, None) is None:
                raise MandelProcException(no_val.format(v))
        
        x_coords = RawArray('I', np.arange(cls.PIXWIDTH, dtype='I'))
        pixel_data = RawArray('B', 3*cls.PIXWIDTH*cls.PIXHEIGHT)
        row_flags = RawArray('B', cls.PIXHEIGHT)
        np_array = np.frombuffer(pixel_data, dtype='B')
        np_array = np_array.reshape(cls.PIXHEIGHT, 3*cls.PIXWIDTH)
        procs = [cls(i, np_array, row_flags, x_coords) for i in range(cls.STEP)]
        for p in procs:
            p.start()
        return procs, np_array, row_flags
Example #7
0
def init_rate_control(ctrl_iface, rate):
    # Initialize the action array shared with the control manager
    tx_rate = RawArray(ctypes.c_uint32, 1)
    tx_rate = dc_utils.shmem_to_nparray(tx_rate, np.float32)
    tx_rate.fill(rate)
    bw_proc = BandwidthController({"test": ctrl_iface}, tx_rate, tx_rate, rate)
    bw_proc.start()
    return tx_rate, bw_proc
Example #8
0
    def init_shared(self, obs_shape):
        shape = (self.batch_size, ) + obs_shape

        state = np.zeros(shape, dtype=np.float32)
        state = RawArray(c_float, state.reshape(-1))
        state = np.frombuffer(state, c_float).reshape(shape)

        return state
Example #9
0
 def getRawArrays(self):
     returnMatrix = RawArray(
         'd', self.returnMatrix.reshape(np.prod(self.shapes['return'])))
     excessReturnMatrix = RawArray(
         'd', self.excessReturn.reshape(np.prod(self.shapes['return'])))
     excessMarketReturn = RawArray(
         'd',
         self.excessMarketReturn.reshape(np.prod(self.shapes['market'])))
     return returnMatrix, excessReturnMatrix, excessMarketReturn
Example #10
0
 def final_init(self):
     self.buffer_size = self.descriptor.num_points()*self.descriptor.buffer_mult_factor
     # logger.info(f"{self.start_connector.parent}:{self.start_connector} to {self.end_connector.parent}:{self.end_connector} buffer of size {self.buffer_size}")
     if self.buffer_size > 50e6:
         logger.debug(f"Limiting buffer size of {self} to 50 Million Points")
         self.buffer_size = 50e6
     self.buff_shared_re = RawArray(ctypes.c_double, int(self.buffer_size))
     self.buff_shared_im = RawArray(ctypes.c_double, int(self.buffer_size))
     self.re_np = np.frombuffer(self.buff_shared_re, dtype=np.float64)
     self.im_np = np.frombuffer(self.buff_shared_im, dtype=np.float64)
Example #11
0
 def raw_array_from_ndarray(arr):
     if arr.dtype == np.float64:
         raw = RawArray('d', arr.size)
         _tempnparray = np.frombuffer(raw, np.float64).reshape(arr.shape)
     elif arr.dtype == np.int64 or arr.dtype == np.int32:
         raw = RawArray('i', arr.size)
         _tempnparray = np.frombuffer(raw, np.int32).reshape(arr.shape)
     else:
         raise TypeError(f"Unknown numpy dtype: {arr.dtype}")
     np.copyto(_tempnparray, arr)
     return raw
Example #12
0
def main(file):
    predicted_map = np.load(
        os.path.join(raw_predictions, "predicted_map_{}.npy".format(file)))

    points = np.loadtxt(os.path.join(in_path, "{}.xyz".format(file)))
    corrected_maps = np.zeros_like(predicted_map)
    n_points = len(predicted_map)
    predicted_neighborhood_indices = np.load(
        os.path.join(raw_predictions,
                     "predicted_neighborhood_indices_{}.npy".format(file)))
    full_errors = []
    BATCH_SIZE = 64  #512#16
    shared_predicted_map = RawArray('d', 10000 * (n_nearest_neighbors + 1) * 3)
    shared_predicted_map = np.frombuffer(shared_predicted_map,
                                         dtype=np.float64).reshape(
                                             10000, (n_nearest_neighbors + 1),
                                             3)
    np.copyto(shared_predicted_map, predicted_map)

    shared_predicted_neighborhood_indices = RawArray(
        'i', 10000 * (n_nearest_neighbors + 1))
    shared_predicted_neighborhood_indices = np.frombuffer(
        shared_predicted_neighborhood_indices,
        dtype=np.int32).reshape(10000, (n_nearest_neighbors + 1))
    np.copyto(shared_predicted_neighborhood_indices,
              predicted_neighborhood_indices)

    manager = multiprocessing.Manager()
    return_dict = manager.dict()

    global align_patch_func

    def align_patch_func(shared_predicted_map,
                         shared_predicted_neighborhood_indices, i):
        return align_patch(shared_predicted_map,
                           shared_predicted_neighborhood_indices, i)

    jobs = []
    start = time.time()
    print('start:', file)
    with multiprocessing.Pool(64) as pool:
        corrected_maps = pool.map(
            functools.partial(align_patch_func, shared_predicted_map,
                              shared_predicted_neighborhood_indices),
            range(n_points))
        corrected_maps = np.array(corrected_maps)
        np.save(os.path.join(res_path, 'corrected_maps_{}.npy'.format(file)),
                corrected_maps)
    end = time.time()
Example #13
0
  def __init__(self, limit, item_shape, n_cpu=1):
    """
    The replay buffer object. Stores everything in float32.

    :param limit: (int) the max number of transitions to store
    :param item_shape: a list of tuples of (str) item name and (tuple) the shape for item
      Ex: [("observations", env.observation_space.shape),\
          ("actions",env.action_space.shape),\
          ("rewards", (1,)),\
          ("dones", (1,))]
    """
    self.limit = limit

    global BUFF
    BUFF = AttrDict()
    self.BUFF = BUFF # a global object that has shared RawArray-based RingBuffers.

    BUFF.items = []

    # item buffers
    for name, shape in item_shape:
      BUFF.items.append('buffer_' + name)
      BUFF['raw_' + name] = RawArray('f', int(np.prod((limit, ) + shape)))
      BUFF['np_' + name] =\
        np.frombuffer(BUFF['raw_' + name], dtype=np.float32).reshape((limit, ) + shape)
      BUFF['buffer_' + name] = RingBuffer(limit, shape=shape, data=BUFF['np_' + name])

    # special buffers
    BUFF.raw_tidx = RawArray('d', limit)
    BUFF.np_tidx = np.frombuffer(BUFF.raw_tidx, dtype=np.int64)
    BUFF.buffer_tidx = RingBuffer(limit, shape=(), dtype=np.int64, data=BUFF.np_tidx)

    BUFF.raw_tleft = RawArray('d', limit)
    BUFF.np_tleft = np.frombuffer(BUFF.raw_tleft, dtype=np.int64)
    BUFF.buffer_tleft = RingBuffer(limit, shape=(), dtype=np.int64, data=BUFF.np_tleft)

    if 'buffer_bg' in BUFF: # is this a successful trajectory?
      BUFF.raw_success = RawArray('f', limit)
      BUFF.np_success = np.frombuffer(BUFF.raw_success, dtype=np.float32)
      BUFF.buffer_success = RingBuffer(limit, shape=(), dtype=np.float32, data=BUFF.np_success)

    self.trajectories = OrderedDict() # a centralized dict of trajectory_id --> trajectory_idxs
    self.total_trajectory_len = 0
    self.current_trajectory = 0

    self.pool = None
    self.n_cpu = n_cpu
    if n_cpu > 1:
      self.pool = mp.Pool(n_cpu, initializer=worker_init, initargs=(BUFF,))
Example #14
0
 def _init_stats_matrices(self, num_ports, num_hosts):
     # Set up the shared stats matrix
     stats_arr_len = num_ports * len(self.stats_dict)
     mp_stats = RawArray(c_ulong, stats_arr_len)
     np_stats = dc_utils.shmem_to_nparray(mp_stats, np.float64)
     self.stats = np_stats.reshape((len(self.stats_dict), num_ports))
     # Set up the shared flow matrix
     if (self.collect_flows):
         flow_arr_len = num_ports * num_hosts * 2
         mp_flows = RawArray(c_ubyte, flow_arr_len)
         np_flows = dc_utils.shmem_to_nparray(mp_flows, np.uint8)
         self.flow_stats = np_flows.reshape((num_ports, 2, num_hosts))
     # Save the initialized stats matrix to compute deltas
     self.prev_stats = self.stats.copy()
     self.deltas = np.zeros(shape=(len(self.stats_dict), num_ports))
Example #15
0
File: server.py Project: nk53/dht
    def __init__(self, clients, server_host, config):
        # setup connection info
        self.hostname = socket.gethostname()
        self.clients = clients
        self.num_clients = len(clients)
        self.server_settings = (server_host, int(config['port']))
        self.backlog_size = int(config['backlog'])
        self.max_retries = int(config['max_retries'])

        # setup logging info
        self.verbose = config['verbose'].upper()[0] == 'T'
        outfilename = os.path.join(
                os.getenv("OUTPUT_DIR"),
                self.hostname + "_server.out")
        self.outfile = open(outfilename, 'w')

        # setup table
        num_keys = int(config['table_size'])
        # syncrhonized by means of an indicator array
        self.table = RawArray(c_int, num_keys)

        # setup worker synchronization
        self.num_workers = int(config['server_threads'])
        self.request_queue = Queue() # multi-producer/multi-consumer queues
        self.sock_locks = [Lock() for i in range(self.num_clients)]
        # a dumb globally-locked array keeps track of pending PUTs
        self.pending = Array(c_int, num_keys)

        # setup multiprocessing info
        super(Server, self).__init__(
            group=None, target=None,
            name="{} (server)".format(self.hostname))
Example #16
0
 def __init__(self, max_size):
     from multiprocessing import Lock, RawArray, RawValue
     self._max_size = max_size
     self._array = RawArray('c', max_size)
     self._pos = RawValue('L')
     self._size = RawValue('L')
     self._locks = Lock(), Lock(), Lock()
Example #17
0
    def __init__(self,
                 host='localhost',
                 port=8001,
                 model='atari',
                 observation_shape=None,
                 n_stack_frames=4,
                 wait_interval_msec=30,
                 **kwargs):
        super(AsyncAgent, self).__init__(**kwargs)

        self._wait_interval_sec = wait_interval_msec / 1000.0

        self._observation_shape = observation_shape
        n_bytes = np.prod(observation_shape)

        self._n_stack_frames = n_stack_frames

        self._state_buffers = [RawArray(c_ubyte, range(n_bytes)) \
                               for _ in range(self._n_stack_frames)]
        self._n_frames = Value(c_long, 0)

        self._action_buffer = Value(c_long, 1)
        self._stop_signal = Value(c_bool, 0)
        self._state_lock = mpLock()

        self._trtis_client = TrtisClient(host=host,
                                         port=port,
                                         model_name=model)
Example #18
0
 def __setitem__(self, key, value):
     """If key is in table, update it. Otherwise, extend the array to make
     room. This uses additive resizing not multiplicative, since the number
     of keys is not likely to change frequently during a run, so do not abuse
     it.
     Raises an error if you try to change the type of the value stored for
     that key--if you need to do this, you must delete the key first.s
     """
     val_type = type(value)
     if val_type not in self.types:
         raise TypeError('SharedTable does not support type ' + type(value))
     if val_type == str:
         value = sys.intern(value)
     if key in self.idx:
         idx, typ = self.idx[key]
         if typ != val_type:
             raise TypeError(
                 ('Cannot change stored type for {key} from ' +
                  '{v1} to {v2}. You need to del the key first' +
                  ' if you need to change value types.').format(
                      key=key, v1=typ, v2=val_type))
         self.arrays[typ][idx] = value
     else:
         old_array = self.arrays[val_type]
         ctyp = self.types[val_type]
         new_array = RawArray(ctyp, len(old_array) + 1)
         for i in range(len(old_array)):
             new_array[i] = old_array[i]
         new_array[-1] = value
         self.arrays[val_type] = new_array
         self.idx[key] = (len(new_array) - 1, val_type)
Example #19
0
    def __init__(self):
        print("SimulationCommunicator object created")

        #wartosci-rezultaty dzialania symulacji
        self.ball_x = RawValue('f', 0.0)
        self.ball_y = RawValue('f', 0.0)

        self.servo_x = RawValue('i', 0)
        self.servo_y = RawValue('i', 0)

        self.corner_tl_x = RawValue('f', 0.0)
        self.corner_tl_y = RawValue('f', 0.0)
        self.corner_tr_x = RawValue('f', 0.0)
        self.corner_tr_y = RawValue('f', 0.0)
        self.corner_br_x = RawValue('f', 0.0)
        self.corner_br_y = RawValue('f', 0.0)
        self.corner_bl_x = RawValue('f', 0.0)
        self.corner_bl_y = RawValue('f', 0.0)

        self.cameraFrame = RawArray('i', 3 * 256**2)

        #zmienne wartosci
        self.servo_actual_pos = [0, 0]  #aktualna pozycja serwa
        self.servo_target_pos = [0, 0]  #docelowa pozycja serwa

        self.refreshDeltaTime = 1 / 60
        self.frameReadTargetDelta = 1 / 40
        self.frameReadLastTime = 0.0
        self.capturedFrame = np.zeros((256, 256, 3))
Example #20
0
def make_raw(array):
    """Create a multiprocessing-ready RawArray; creates a copy of ``array``.

    Parameters
    ----------
    array : np.array
        Array to use. Is copied into a RawArray.

    Returns
    -------
    (np.array, RawArray)
        [0] Numpy array created from raw_array. Do not share with other
            processes.
        [1] Raw Array; safe for sharing.

    References
    ----------
    https://research.wmz.ninja/articles/2018/03/
    on-sharing-large-arrays-when-using-pythons-multiprocessing.html
    """

    raw_array = RawArray('f', int(np.prod(array.shape)))
    array_np = np.frombuffer(raw_array, dtype=np.float32).reshape(array.shape)
    np.copyto(array_np, array)

    return raw_array
Example #21
0
 def __init__(self, entry):
     self.entry = entry
     self.queue = Queue()
     self.raw = RawArray('B', self.MAX_SHM_SIZE)
     self.proc = Process(target=self.p_main, args=[], daemon=True)
     self.proc.start()
     self.joint = Thread(target=self.proc.join, args=[], daemon=True)
     self.joint.start()
Example #22
0
 def _setup(self):
     self._shared_mem = RawArray('c', self._cap)
     self._base = ctypes.addressof(self._shared_mem)
     self._locker.acquire()
     try:
         self._allocator = PageAllocator(self._shared_mem, self._page_num,
                                         self._page_size)
     finally:
         self._locker.release()
    def create_shared_array(ctype, array_shape):
        '''
        Returns a multiprocessing.RawArray and its Numpy wrapper.
        '''
        numel = int(reduce(lambda x, y: x*y, array_shape))
        shared_array = RawArray(ctype, numel)
        shared_array_wrapper = np.frombuffer(shared_array, dtype=ctype).reshape(array_shape)

        return shared_array, shared_array_wrapper
Example #24
0
def genFakeData():
    # Share
    shape = (16, 1000)
    raw = RawArray('d', shape[0] * shape[1])
    # Don't share
    dat = np.zeros(shape, dtype=np.float64)
    datBuf = np.frombuffer(raw, dtype=np.float64).reshape(shape)
    np.copyto(datBuf, dat)
    return (datBuf, raw, shape)
Example #25
0
def ndarray_to_shmem(array):
    """ Converts a numpy.ndarray to a multiprocessing.Array object.
    
        The memory is copied, and the array is flattened.
    """
    arr = array.reshape((-1, ))
    data = RawArray(_numpy_to_ctypes[array.dtype.type], arr.size)
    ctypes.memmove(data, array.data[:], len(array.data))
    return data
Example #26
0
    def run(self):
        if self.extractor_name == 'Neighborhood':
            self.extractor = Neighborhood(**self.extractor_kwargs)
        else:
            self.extractor = Region(**self.extractor_kwargs)
        self.extractor.preprocess(self.image)
        # Build a shared version of the extractor's stack
        extractor_def = {'name': self.extractor_name, 'shape': self.extractor.stack.shape, 'kwargs': self.extractor_kwargs}
        mem_size = 1
        for s in self.extractor.stack.shape:
            mem_size *= s
        self.shared_stack = RawArray(ctypes.c_float, mem_size)
        stack_shape = self.extractor.stack.shape
        tmp = np.frombuffer(self.shared_stack, dtype=np.float32).reshape(stack_shape)
        tmp[:,:,:] = self.extractor.stack[:,:,:]
        tmp = None
        # Fetch dimensions of vector for a simple location
        vector = self.extractor.extract_at(0, 0)
        # Calculate number of colums takinginto consideration stride
        cols = self.image.shape[1] // self.stride
        shape = ((cols, ) + vector.shape)
        mem_size = 1
        # Calculate the size of the memory needed for one row
        for s in shape:
            mem_size *= s
        # Determine the max number of processes we can spawn
        if tf.test.is_gpu_available(cuda_only=False):
            mem_available = psutil.virtual_memory().available / 1024 / 1024 / 2
            row_size = (mem_size * 4) / 1024 / 1024
            max_children = int(mem_available / row_size)
        else:
            max_children = cpu_count() - 2
        if max_children <= 0:
            max_children = 1
        elif max_children > 250:
            max_children = 250
        print(max_children)
        for i in range(max_children):
            state = Value('i', -1)
            mem = Array(ctypes.c_float, mem_size)
            array = np.frombuffer(mem.get_obj(), dtype=np.float32).reshape(shape)
            rows = [x for x in range(i * self.stride, self.image.shape[0], max_children * self.stride)]
            if len(rows) > 0:
                p = Process(target=extract, args=(extractor_def, self.shared_stack, rows, mem, state, shape))
                self.processes.append(p)
                self.mem.append(mem)
                self.states.append(state)
                self.arrays.append(array)
        
        self.extractor = None
        self.ready = True
        for p in self.processes:
            p.start()

        for p in self.processes:
            p.join()
Example #27
0
 def _setup(self):
     self._shared_mem = RawArray('c', self._cap)
     self._base = np.frombuffer(
         self._shared_mem, dtype='uint8', count=self._cap)
     self._locker.acquire()
     try:
         self._allocator = PageAllocator(self._base, self._total_pages,
                                         self._page_size)
     finally:
         self._locker.release()
    def __init__(self, comm_info):
        """Initilize shared memory."""
        super(ShareByRawArray, self).__init__()

        self.size_shared_mem = comm_info.get("size", 100000000)
        self.agent_num = comm_info.get("agent_num", 4)

        self.control_q = Queue()
        self.mem = RawArray(c_ubyte, self.size_shared_mem)
        self.size_mem_agent = int(self.size_shared_mem / self.agent_num)
Example #29
0
    def optimize(self, maxiter=1000, perdiff=0.1):
        """
        Optimizes the posterior distribution given the data. The
        algorithm terminates when either the maximum number of
        iterations is reached or the percent difference in the
        posterior is less than perdiff.
        """

        #if self.gpu:
        #    self.gdata = to_gpu(np.asarray(self.data, dtype=np.float32))
        #    self.g_ones = to_gpu(np.ones((self.ncomp,1), dtype=np.float32))
        #    self.g_ones_long = to_gpu(np.ones((self.nobs, 1), dtype=np.float32))

        if self.parallel:
            from multiprocessing import RawArray
            self.shared_dens_mem = RawArray('d', self.nobs * self.ncomp)
            self.shared_dens = np.frombuffer(self.shared_dens_mem).reshape(
                self.nobs, self.ncomp)
            for w in self.workers:
                w.set_dens(self.shared_dens_mem)
                w.start()

        # start threads
        if self.gpu:
            self.gpu_workers = init_GPUWorkers(self.data, self.dev_list)

        self.expected_labels()
        ll_2 = self.log_posterior()
        ll_1 = 1
        it = 0
        if self.verbose:
            if self.gpu:
                print "starting GPU enabled BEM"
            else:
                print "starting BEM"
        while np.abs(ll_1 - ll_2) > 0.01 * perdiff and it < maxiter:
            if isinstance(self.verbose,
                          int) and self.verbose and not isinstance(
                              self.verbose, bool):
                if it % self.verbose == 0:
                    print "%d:, %f" % (it, ll_2)
            it += 1

            self.maximize_mu()
            self.maximize_Sigma()
            self.maximize_weights()
            self.expected_alpha()
            self.expected_labels()
            ll_1 = ll_2
            ll_2 = self.log_posterior()
        if self.gpu:
            kill_GPUWorkers(self.gpu_workers)
        if self.parallel:
            for i in xrange(self.num_cores):
                self.work_queue[i].put(None)
Example #30
0
def gen_time_results(mat_size, core_list, no_runs):
    if __name__ == '__main__':
        for _ in range(no_runs):
            mat_shape = (mat_size, mat_size)
            data_A = np.random.rand(*mat_shape).astype(np.float32)
            data_B = np.random.rand(*mat_shape).astype(np.float32)
            A = RawArray('f', mat_shape[0] * mat_shape[1])
            B = RawArray('f', mat_shape[0] * mat_shape[1])
            A_np = np.frombuffer(A, dtype=np.float32).reshape(mat_shape)
            B_np = np.frombuffer(B, dtype=np.float32).reshape(mat_shape)
            np.copyto(A_np, data_A)
            np.copyto(B_np, data_B)
            for no_cores in core_list:
                print(no_cores)
                #Assuming the matrix is of size 2^n for int N, we take log2 to find the value of n
                power = np.log2(no_cores) / 2
                #Represents the number of partitons that must be calculated in the result matrix C
                pars_i = int(2**(np.ceil(power)))
                pars_j = int(2**(np.floor(power)))
                #Represents the size of each partiton in the i and j axis
                i_size = int(mat_size / pars_i)
                j_size = int(mat_size / pars_j)
                start = time.perf_counter()
                send_list = []
                for i in range(pars_i):
                    for j in range(pars_j):
                        send_list.append([
                            i * i_size, (i + 1) * i_size, j * j_size,
                            (j + 1) * j_size, mat_size
                        ])
                p = Pool(processes=no_cores,
                         initializer=init_worker,
                         initargs=(A, B))
                res_list = p.starmap(matrix_mult, send_list)
                p.close()
                result = np.vstack(
                    np.split(np.concatenate(res_list, axis=1), pars_i, axis=1))
                finish = time.perf_counter()
                time_taken = round(finish - start, 10)
                print(time_taken)
    print("")
    return None