def shared_array(shape=(1, ), dtype=np.float32): np_type_to_ctype = { np.float32: ctypes.c_float, np.float64: ctypes.c_double, np.bool: ctypes.c_bool, np.uint8: ctypes.c_ubyte, np.uint64: ctypes.c_ulonglong, np.complex128: ctypes.c_double, np.complex64: ctypes.c_float } numel = np.int(np.prod(shape)) iscomplex = (dtype == np.complex128 or dtype == np.complex64) #numel *= arr_ctypes = sharedctypes.RawArray(np_type_to_ctype[dtype], numel * (1 + iscomplex)) np_arr = np.frombuffer(arr_ctypes, dtype=dtype, count=numel) np_arr.shape = shape return np_arr
def make_array(shape=(1, ), dtype=np.float32, shared=False, fill_val=None): np_type_to_ctype = { np.float32: ctypes.c_float, np.float64: ctypes.c_double, np.bool: ctypes.c_bool, np.uint8: ctypes.c_ubyte, np.uint64: ctypes.c_ulonglong } if not shared: np_arr = np.empty(shape, dtype=dtype) else: numel = np.prod(shape) arr_ctypes = sharedctypes.RawArray(np_type_to_ctype[dtype], numel) np_arr = np.frombuffer(arr_ctypes, dtype=dtype, count=numel) np_arr.shape = shape if not fill_val is None: np_arr[...] = fill_val return np_arr
def call_obliq_BY_fig2(body, incl, phi_prec): phi0 = 0 nphi_Sun = 36 # number of solar positions nphi = 36 # number of spin positions nobliq = 60 # number of obliquities dobliq = np.pi / nobliq #tau_l_arr = np.zeros(nobliq) # to store torques o_arr = np.zeros(nobliq) tau_l_arr = np.ctypeslib.as_ctypes(np.zeros((60))) shared_array_tau = sharedctypes.RawArray(tau_l_arr._type_, tau_l_arr) for i in range(nobliq): obliquity = i * dobliq o_arr[i] = obliquity * 180 / np.pi for i in range(nobliq): obliquity = i * dobliq tau_BY_x, tau_BY_y, tau_BY_z, tau_l = compute_BY( body, obliquity, nphi, nphi_Sun, incl, phi0, phi_prec) tau_l_arr[i] = tau_l return o_arr, tau_l_arr
def evaluate(self): """ Evaluate the sys """ # # acceleration global data # # initialisation of accelerations tmp = ctypeslib.as_ctypes(zeros((len(self.bodies), 3))) data = sharedctypes.RawArray(tmp._type_, tmp) # with Pool(processes=min(self.ncpus, len(self.bodies) - 1)) as p: p.map(self._acceleration, range(len(self.bodies))) self.acc = ctypeslib.as_array(data) data = None # Reset data shared array # # Equilibrium self.Ek = self.kinetic_energy() # # self.Ep = self.potential_energy() # self.C = self.mass_center()
def _create_examples(self): # Creates data if not yet created corpus = Corpus(self.data_path) name = 'valid' if self.data_type == self.Validation_Data else self.data_type data = getattr(corpus, name) # Make a shared array, read only access so no need to lock self.data_ctypes = sharedctypes.RawArray('f', data) logger.info('Create info objects for the files') # For validation and test we only have one example # For training we create multiple examples such that different parts of the file will be used in one # mini-batch number_of_examples = 128 if self.data_type == BaseDataReader.Train_Data else 1 self.examples.append([self.ExampleInfo(example_id=i, data_ctypes=self.data_ctypes, offset_size=self.offset_size, random_mode=self.random_mode) for i in range(number_of_examples)]) # CV Not implemented for now for this dataset logger.warning('For WikiText dataset separate validation set is provided, ' 'currently cv_n and cv_k settings have no effect!')
def __init__(self, max_workers=None, const_args=[], shared_np_arrs=[]): ''' Constructor ''' super().__init__(max_workers) self._const_args = const_args shared_arrays_ctype = [] shared_arrays_np = [] # TODO do not create copy of shared array, if it already has a suitable # data structure for arr in shared_np_arrs: dtype = arr.dtype arrShared = np.empty(arr.size * dtype.itemsize, np.int8) arrShared = np.ctypeslib.as_ctypes(arrShared) ctypes_arr = sharedctypes.RawArray(arrShared._type_, arrShared) shared_arrays_ctype.append((ctypes_arr, arr.dtype, arr.shape)) view = np.ctypeslib.as_array(ctypes_arr).view(arr.dtype).reshape( arr.shape) view[:] = arr shared_arrays_np.append(view) self._shared_arrays_np = shared_arrays_np self._shared_arrays = shared_arrays_ctype
def numpy_sweep(start_frequency=20.0, stop_frequency=20000.0, phase=0.0, interval=(0, 1.0), sampling_rate=48000.0, length=2**16): """A pure NumPy implementation of the LinearSweep for benchmarking. See the LinearSweep class for documentation of the parameters. """ # allocate shared memory for the channels array = sharedctypes.RawArray(ctypes.c_double, length) channels = numpy.frombuffer(array, dtype=numpy.float64).reshape( (1, length)) # compute some parameters start, stop = sumpf_internal.index(interval, length) sweep_offset = start / sampling_rate sweep_length = stop - start T = sweep_length / sampling_rate k = (stop_frequency - start_frequency) / T a = 2.0 * math.pi * start_frequency b = math.pi * k t = numpy.linspace(-sweep_offset, (length - 1) / sampling_rate - sweep_offset, length) # the time values for the samples # generate the sweep array = t * t array *= b array += a * t array += phase numpy.sin(array, out=channels[0, :]) # fake store some additional values, because these values are actually stored in the constructor of the sweep _ = start_frequency + k * (-sweep_offset / T) # noqa: F841 _ = start_frequency + k * ((T - sweep_offset) / T) # noqa: F841 return sumpf.Signal(channels=channels, sampling_rate=sampling_rate, offset=0, labels=("Sweep", ))
def numpy_sweep(start_frequency=20.0, stop_frequency=20000.0, phase=0.0, interval=(0, 1.0), sampling_rate=48000.0, length=2**16): """A pure NumPy implementation of the ExponentialSweep for benchmarking. See the ExponentialSweep class for documentation of the parameters. """ # allocate shared memory for the channels array = sharedctypes.RawArray(ctypes.c_double, length) channels = numpy.frombuffer(array, dtype=numpy.float64).reshape( (1, length)) # generate the sweep start, stop = sumpf_internal.index(interval, length) sweep_offset = float(start / sampling_rate) sweep_duration = (stop - start) / sampling_rate frequency_ratio = stop_frequency / start_frequency l = sweep_duration / math.log(frequency_ratio) a = 2.0 * math.pi * start_frequency * l t = numpy.linspace(-sweep_offset, (length - 1) / sampling_rate - sweep_offset, length) array = t array /= l numpy.expm1(array, out=array) array *= a array += phase numpy.sin(array, out=channels[0, :]) # fake store some additional values, because these values are actually stored in the constructor of the sweep _ = start_frequency * frequency_ratio**(-sweep_offset / sweep_duration ) # noqa: F841 _ = start_frequency * frequency_ratio**( (sweep_duration - sweep_offset) / sweep_duration) # noqa: F841 return sumpf.Signal(channels=channels, sampling_rate=sampling_rate, offset=0, labels=("Sweep", ))
def create(shape, dtype='d'): '''Create an uninitialised shared array. Avoid object arrays, as these will almost certainly break as the objects themselves won't be stored in shared memory, only the pointers''' shape = numpy.atleast_1d(shape).astype('i') #we're going to use a flat ctypes array N = numpy.prod(shape) dtype = numpy.dtype(dtype) #if the dtype's relatively simple create the corresponding ctypes array #otherwise create a suitably sized byte array dt = dtype.char if not dt in sharedctypes.typecode_to_type.keys(): dt = 'b' N *= dtype.itemsize a = sharedctypes.RawArray(dt, N) sa = shmarray(a, shape, dtype) return sa
def _allocate_shared_memory(self): if self.measure_required(): self.mem_bulk = None else: self.mem_bulk = \ sharedctypes.RawArray('b', self.batch_size * self.mem_size)
xlist.append(x_list) ylist.append(y_list) EGM08.append(egm08) EGM96.append(egm96) #----------------------- river = ([flatten for inner in river for flatten in inner]) pname = ([flatten for inner in pname for flatten in inner]) xlist = ([flatten for inner in xlist for flatten in inner]) ylist = ([flatten for inner in ylist for flatten in inner]) EGM08 = ([flatten for inner in EGM08 for flatten in inner]) EGM96 = ([flatten for inner in EGM96 for flatten in inner]) pnum = len(pname) ######################################################### pnum = len(pname) opn = np.ctypeslib.as_ctypes(np.zeros([N, pm.ens_mem(), pnum], np.float32)) shared_array_opn = sharedctypes.RawArray(opn._type_, opn) asm = np.ctypeslib.as_ctypes(np.zeros([N, pm.ens_mem(), pnum], np.float32)) shared_array_asm = sharedctypes.RawArray(asm._type_, asm) # for parallel calcualtion inputlist = [] for day in np.arange(start, last): target_dt = start_dt + datetime.timedelta(days=day) yyyy = '%04d' % (target_dt.year) mm = '%02d' % (target_dt.month) dd = '%02d' % (target_dt.day) for num in np.arange(1, pm.ens_mem() + 1): numch = '%03d' % num inputlist.append([yyyy, mm, dd, numch]) #print (yyyy,mm,dd,numch)
"""Demo of shared c-types with numpy""" import multiprocessing as mp from multiprocessing import sharedctypes from numpy import ctypeslib def fill_array(arr, value): arr.fill(value) if __name__ == '__main__': # Create an array of integers on c-level raw_array = sharedctypes.RawArray('i', 4) # Convert the raw array into a numpy c-object array = ctypeslib.as_array(raw_array) # Reshape in-place array.shape = (2, 2) # Create two processes which write on different rows of `array` process1 = mp.Process(target=fill_array, args=(array[0, :], 5)) process2 = mp.Process(target=fill_array, args=(array[1, :], 7)) # Start both processes and wait for them to finish process1.start() process2.start() process1.join()
vector = np.random.rand(N).astype(np.float64) # Initial solution: all zeros x_calc = np.zeros(vector.shape, dtype=np.float64) # x_true = np.linalg.solve(matrix, vector) start_time = time.time() # Prepare cofficients diag_coefs = np.diagonal(matrix) for i in range(N): matrix[i] = matrix[i] / diag_coefs[i] * -1 matrix[np.diag_indices(N, 2)] = vector / diag_coefs # Prepare to share among processes sc_matrix = sharedctypes.RawArray('d', matrix.reshape(-1)) sc_vector = sharedctypes.RawArray('d', vector.reshape(-1)) sc_x_calc = sharedctypes.RawArray('d', x_calc.reshape(-1)) processes = [] for proc_id in range(PROC_COUNT): worker_args = (sc_matrix, sc_vector, sc_x_calc, proc_id) process = Process(target=worker_func, args=worker_args) processes.append(process) process.start() for process in processes: process.join() end_time = time.time() duration = end_time - start_time
# Carga de los datos originales data = np.load('trj_displacement.npy') #np.load(str(sys.argv[1])) data = get_norm(data) nframes = data.shape[0] # Número de frames o conformaciones natoms = data.shape[1] # Número de átomos #----------------------------------------------------------------------------- # Arguments of get_norm list_of_pairs = [(N, M) for N in range(natoms) for M in range(N)] # Initialize the correlation matrix corr_matrix = np.ctypeslib.as_ctypes(np.zeros((natoms, natoms))) # Define interable to apply parallel function over norm_data shared_array = sharedctypes.RawArray(corr_matrix._type_, corr_matrix) # Apply parallel map of the function in the given array p2 = multiprocessing.Pool() p2.map(count_knn, list_of_pairs) # Return the map into a n dimensional array corr_matrix = np.ctypeslib.as_array(shared_array) #----------------------------------------------------------------------------- #Aplicamos el coeficiente de correlación generalizado knn = corr_matrix knn = (1. - np.exp(-2. * corr_matrix))**0.5 # Guardamos la matriz resultante en un archivo .npy
if not os.path.exists(args.output): os.makedirs(args.output) OUTPUT = args.output THREADS = args.threads BLOCK_SIZE = args.block_size s = parse_mcl_dump_file(args.mcl_dump) t = load_gene_map(args.gene_map) u = add_pc_labels(s, t) PC_DF = create_composition_mtx(u, presence_absence=True) pc_result = np.ctypeslib.as_ctypes(np.zeros((PC_DF.shape[0], PC_DF.shape[0]))) hyper_result = np.ctypeslib.as_ctypes( np.zeros((PC_DF.shape[0], PC_DF.shape[0]))) SHARED_PC_ARRAY = sharedctypes.RawArray(pc_result._type_, pc_result) SHARED_HYPER_ARRAY = sharedctypes.RawArray(hyper_result._type_, hyper_result) SHARED_MTX = calculate_shared_matrix() PC_COUNTS = PC_DF.sum(axis=1) hypergeometric_df = calculate_hypergeometric_survival() hypergeometric_df.to_csv('%s/hypergeometric.survival.txt' % OUTPUT, sep='\t') df = hypergeometric_df.where( np.triu(np.ones(hypergeometric_df.shape)).astype(np.bool)) hypergeometric_df = pd.read_csv('%s/hypergeometric.survival.txt' % OUTPUT, sep='\t') df = df.stack() df.to_csv('%s/hypergeometric.survival.long.txt' % OUTPUT, sep='\t')
full_rmsd = np.zeros((n_nitrogen, n_oxygen)) S = full_rmsd print "Scanning", n_total, " energies", "using", WORKERS, 'cpu\'s' # for i in xrange(n_nitrogen): # for j in xrange(n_oxygen): # TODO # http://briansimulator.org/sharing-numpy-arrays-between-processes from multiprocessing import sharedctypes size = S.size shape = S.shape S.shape = size S_ctypes = sharedctypes.RawArray('d', S) S = np.frombuffer(S_ctypes, dtype=np.float64, count=size) S.shape = shape from numpy import ctypeslib def worker(id, job): """ worker function for MP """ S = ctypeslib.as_array(S_ctypes) S.shape = shape for i in job: for j in xrange(n_oxygen): N = scan_nitrogen[i]
def __init__(self, source, bufferlen=5, name=None, send_data_to_sink_manager=False, **kwargs): ''' Parameters ---------- source: class lower-level class for interacting directly with the incoming data (e.g., plexnet) bufferlen: int Constrains the maximum amount of data history stored by the source name: string, optional, default=None Name of the sink, i.e., HDF table. If one is not provided, it will be inferred based on the name of the source module send_data_to_sink_manager: boolean, optional, default=False Flag to indicate whether data should be saved to a sink (e.g., HDF file) kwargs: dict, optional, default = {} For the multi-channel data source, you MUST specify a 'channels' keyword argument Note that kwargs['channels'] does not need to a list of integers, it can also be a list of strings. ''' super(MultiChanDataSource, self).__init__() if name is not None: self.name = name else: self.name = source.__module__.split('.')[-1] self.filter = None self.source = source self.source_kwargs = kwargs self.bufferlen = bufferlen self.max_len = int(bufferlen * self.source.update_freq) self.channels = kwargs['channels'] self.chan_to_row = dict() for row, chan in enumerate(self.channels): self.chan_to_row[chan] = row self.n_chan = len(self.channels) dtype = self.source.dtype # e.g., np.dtype('float') for LFP self.slice_size = dtype.itemsize self.idxs = shm.RawArray('l', self.n_chan) self.last_read_idxs = np.zeros(self.n_chan) rawarray = shm.RawArray('c', self.n_chan * self.max_len * self.slice_size) self.data = np.frombuffer(rawarray, dtype).reshape( (self.n_chan, self.max_len)) #self.fo2 = open('/storage/rawdata/test_rda_get.txt','w') #self.fo3 = open('/storage/rawdata/test_rda_run.txt','w') self.lock = mp.Lock() self.pipe, self._pipe = mp.Pipe() self.cmd_event = mp.Event() self.status = mp.Value('b', 1) self.stream = mp.Event() self.data_has_arrived = mp.Value('b', 0) self.methods = set(n for n in dir(source) if inspect.ismethod(getattr(source, n))) self.send_data_to_sink_manager = send_data_to_sink_manager if self.send_data_to_sink_manager: self.send_to_sinks_dtype = np.dtype([ ('chan' + str(chan), dtype) for chan in kwargs['channels'] ]) self.next_send_idx = mp.Value('l', 0) self.wrap_flags = shm.RawArray( 'b', self.n_chan) # zeros/Falses by default self.supp_hdf_file = kwargs['supp_file']
def fit(self, X, y, num_samples, num_features, loss_per_epoch=10): p = self.params if self.w is None: self.w = np.random.normal(0, INIT_WEIGHT_STD, size=(num_features,)) def worker_fit(id_w, num_workers, X_w, y_w, weights_w, shape, indices, counter, start_barrier, params_w): assert params_w.regularizer is not None # reconstruct numpy shared array num_samples, num_features = shape weights_w = ctypeslib.as_array(weights_w) weights_w.shape = (num_features,) if not isspmatrix(X_w): X_w = ctypeslib.as_array(X_w) X_w.shape = (num_samples, num_features) y_w = ctypeslib.as_array(y_w) y_w.shape = (num_samples,) memory = GradientMemory(take_k=params_w.take_k, take_top=params_w.take_top, with_memory=params_w.with_memory) start_barrier.wait() while True: with counter.get_lock(): idx = counter.value counter.value += 1 if idx >= num_samples * params_w.num_epoch: break sample_idx = indices[idx] epoch = idx // num_samples iteration = idx % num_samples lr = self.lr(epoch, iteration, num_samples, num_features) x = X_w[sample_idx] if isspmatrix(x): minus_grad = -1. * params_w.regularizer * weights_w sparse_minus_grad = y[sample_idx] * x * sigmoid(-y[sample_idx] * x.dot(weights_w).squeeze(0)) minus_grad[sparse_minus_grad.indices] += sparse_minus_grad.data else: minus_grad = y[sample_idx] * x * sigmoid(-y[sample_idx] * x.dot(weights_w)) minus_grad -= params_w.regularizer * weights_w sparse = params_w.take_k and (params_w.take_k < num_features) lr_minus_grad = memory(lr * minus_grad, sparse=sparse) if sparse: weights_w[lr_minus_grad[0]] += lr_minus_grad[1] else: weights_w += lr_minus_grad with mp.Manager() as manager: counter = mp.Value('i', 0) start_barrier = manager.Barrier(p.n_cores + 1) # wait all worker and the monitor to be ready indices = np.zeros((p.num_epoch, num_samples), dtype=int) for i in range(p.num_epoch): indices[i] = np.arange(num_samples) np.random.shuffle(indices[i]) indices = indices.flatten() weights_w = sharedctypes.RawArray('d', self.w) self.w = ctypeslib.as_array(weights_w) self.w.shape = (num_features,) if isspmatrix(X): X_w = X y_w = y else: X_w = sharedctypes.RawArray('d', np.ravel(X)) y_w = sharedctypes.RawArray('d', y) processes = [mp.Process(target=worker_fit, args=( i, p.n_cores, X_w, y_w, weights_w, X.shape, indices, counter, start_barrier, self.params)) for i in range(p.n_cores)] for p in processes: p.start() # monitor the progress print_every = num_samples // loss_per_epoch next_print = 0 # loss computing on another thread through the queue stop = manager.Value('b', False) w_queue = mp.Queue() results = manager.dict() def loss_computer(q, regularizer, res, stop): # should be stoppable print('start loss computer') losses = [] iters = [] timers = [] while not q.empty() or not stop.value: try: epoch, iter_, total_iter, chrono, w = q.get(block=True, timeout=1) except queue.Empty: # print('empty queue') continue # print('dequeue', epoch, iter_) loss = np.sum(np.log(1 + np.exp(-y * (X @ w)))) / X.shape[0] if regularizer is not None: loss += regularizer * np.square(w).sum() / 2 timers.append(chrono) losses.append(loss) iters.append(total_iter) print("epoch {} iteration {} loss {} time {}s".format(epoch, iter_, loss, chrono)) res['losses'] = np.array(losses) res['iters'] = np.array(iters) res['timers'] = np.array(timers) start_barrier.wait() start_time = time.time() loss_computer = mp.Process(target=loss_computer, args=(w_queue, self.params.regularizer, results, stop)) loss_computer.start() while counter.value < self.params.num_epoch * num_samples: if counter.value > next_print: w_copy = (self.w_estimate if self.w_estimate is not None else self.w).copy() epoch = next_print // num_samples iter_ = next_print % num_samples chrono = time.time() - start_time w_queue.put((epoch, iter_, next_print, chrono, w_copy)) # print('enqueue', epoch, iter_) next_print += print_every else: time.sleep(.1) stop.value = True # stop the loss computer for i, p in enumerate(processes): p.join() loss_computer.join() print(results) return results['iters'], results['timers'], results['losses']
x = np.logspace(-2, 1.1, l) y = 5.1 z = 5.1 np.savez('variables',T=T,y=y,z=z,x=x) Tgrid, xgrid = np.meshgrid(T,y) err = np.zeros([l,l]) size = l block_size = 5 result = np.ctypeslib.as_ctypes(np.zeros((size, size))) shared_array = sharedctypes.RawArray(result._type_, result) def fill_per_window(args): window_x, window_y = args tmp = np.ctypeslib.as_array(shared_array) start = time() print('Block started:',args) for idx_x in range(window_x, window_x + block_size): for idx_y in range(window_y, window_y + block_size): tmp[idx_x, idx_y] = Fisher_ns(T[idx_y],x[idx_x],y, z, estimate='spec') end = time() print('Block completed:',args) print('Time taken:', end-start) window_idxs = [(i, j) for i, j in
def fit_until(self, X, y, num_samples, num_features, baseline=None): # num_samples, num_features = X.shape p = self.params if self.w is None: self.w = np.random.normal(0, INIT_WEIGHT_STD, size=(num_features,)) def worker_fit(id_w, num_workers, X_w, y_w, weights_w, shape, indices, results, params_w, stopper): # reconstruct numpy shared array num_samples, num_features = shape weights_w = ctypeslib.as_array(weights_w) weights_w.shape = (num_features,) if not isspmatrix(X_w): X_w = ctypeslib.as_array(X_w) X_w.shape = (num_samples, num_features) y_w = ctypeslib.as_array(y_w) y_w.shape = (num_samples,) memory = GradientMemory(take_k=params_w.take_k, take_top=params_w.take_top, with_memory=params_w.with_memory) if id_w == 0: losses = np.zeros(params_w.num_epoch * LOSS_PER_EPOCH + 1) losses[0] = self.loss(X, y) start_time = time.time() last_printed = 0 loss_every = num_samples // LOSS_PER_EPOCH for epoch in range(params_w.num_epoch): for iteration in range(id_w, num_samples, num_workers): # worker 0 gave stop signal, reached accuracy if stopper.value: return sample_idx = indices[epoch][iteration] lr = self.lr(epoch, iteration, num_samples, num_features) x = X_w[sample_idx] if isspmatrix(x): x = np.array(x.todense()).squeeze(0) minus_grad = y[sample_idx] * x * sigmoid(-y[sample_idx] * np.dot(x, self.w)) # minus_grad = - x * (pred_proba - y_w[sample_idx]) if params_w.regularizer: minus_grad -= 2 * params_w.regularizer * weights_w sparse = params_w.take_k and (params_w.take_k < num_features) # next_real -= 1 lr_minus_grad = memory(lr * minus_grad, sparse=sparse) # , no_apply=(next_real != 0)) # if next_real == 0: # next_real = params_w.real_update_every if sparse: weights_w[lr_minus_grad[0]] += lr_minus_grad[1] else: weights_w += lr_minus_grad if id_w == 0 and num_samples * epoch + iteration - last_printed >= loss_every: last_printed = num_samples * epoch + iteration timing = time.time() - start_time loss = self.loss(X, y) losses[epoch * LOSS_PER_EPOCH + (iteration // loss_every) + 1] = loss print("epoch {} iter {} loss {} time {}s".format( epoch, iteration, loss, timing)) if baseline and loss <= baseline: stopper.value = True results['epoch'] = epoch results['losses'] = losses results['iteration'] = iteration results['timing'] = timing return # if failed to converge... if id_w == 0: results['epoch'] = epoch results['losses'] = losses results['iteration'] = iteration results['timing'] = time.time() - start_time with mp.Manager() as manager: results = manager.dict() stopper = manager.Value('b', False) indices = np.zeros((p.num_epoch, num_samples), dtype=int) for i in range(p.num_epoch): indices[i] = np.arange(num_samples) np.random.shuffle(indices[i]) weights_w = sharedctypes.RawArray('d', self.w) self.w = ctypeslib.as_array(weights_w) self.w.shape = (num_features,) if isspmatrix(X): X_w = X y_w = y else: X_w = sharedctypes.RawArray('d', np.ravel(X)) y_w = sharedctypes.RawArray('d', y) processes = [mp.Process(target=worker_fit, args=( i, p.n_cores, X_w, y_w, weights_w, X.shape, indices, results, self.params, stopper)) for i in range(p.n_cores)] for p in processes: p.start() for i, p in enumerate(processes): p.join() print(results) return results['timing'], results['epoch'], results['iteration'], results['losses']
def main(): # Set global variables (to be used in SMART) global p, A, normA, shared_array # Load Shepp-Logan phantom phantom = loadmat('phantom.mat')['phantom256'] size = phantom.shape[0] # Define number of cameras numViews = 10 # Create geometries and projector proj_geom = astra.create_proj_geom('parallel', 1.0, size, np.linspace(0, 2 * np.pi, numViews)) vol_geom = astra.create_vol_geom(size, size) proj_id = astra.create_projector('linear', proj_geom, vol_geom) matrix_id = astra.projector.matrix(proj_id) # Retrieve phantom system matrix as a compressed sparse row array A = astra.matrix.get(matrix_id) # create forward projection [sinogram_id, sinogram] = astra.create_sino(phantom, proj_id) # Setup the projection matrix as an opTomo object W = astra.optomo.OpTomo(proj_id) # Get the squared L2-norm of the projection matrix normA = np.asarray(A.multiply(A).sum(axis=1)).squeeze() # Prepare the projections into a ravelled array p = sinogram.ravel() # Number of rays per projection raysPerProj = len(p) // numViews # Get the indices of all the valid voxels using MLOS vInit, validVoxels = MLOS(numViews, p, raysPerProj, A) print('Elapsed time (MLOS): {0} s'.format(time.thread_time())) # Setup objects for parallelization v_recon = np.ctypeslib.as_ctypes(vInit.ravel()) shared_array = sharedctypes.RawArray(v_recon._type_, v_recon) # Setup a process pool #prc = mp.Process(target=SMART, args=(validVoxels, shared_array)) #prc.start() #prc.join() # Setup up pool and run processing #p = mp.Pool() #res = p.map(SMART, validVoxels[np.newaxis, :].T) SMART2(validVoxels[np.newaxis, :].T) v_recon = np.ctypeslib.as_array(shared_array) print('Elapsed time (SMART): {0} s'.format(time.thread_time())) fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(8, 8)) ax1.imshow(phantom) ax1.set_title('Original') ax2.imshow(v_recon.reshape(size, size)) ax2.set_title('SMART') ax1.axis('off') ax2.axis('off') fig.savefig('v_recon.png')
def _mk_RawArray(shape, dtype): size = six.moves.reduce(lambda x, y: x * y, shape, 1) dtype = np.dtype(dtype) return sharedctypes.RawArray('B', size * dtype.itemsize)
arr = ds_ann_max.values.copy() good_index = np.apply_along_axis(lambda x: (x == 0).all(), arr=arr, axis=0) idx = np.where(good_index == True) bad_idx = list(zip(*idx)) # run in parallel indexes = list(np.ndindex(arr.shape[-2:])) # make the output arrays? rows, cols = arr.shape[1:] out = np.ctypeslib.as_ctypes( np.zeros((len(lmom_list), rows, cols), dtype=np.float)) out_shared = sharedctypes.RawArray(out._type_, out) p = mp.Pool(ncpus) p.map(run_par_update_arr, indexes) p.close() p.join() # bring these c-types arrays back to numpy arrays. out = np.ctypeslib.as_array(out_shared).astype(np.float32) # update the np.nans to something more useable and SNAP-ish out[np.isnan(out)] = -9999 out_data = out_data + [out] new_arr = np.array(out_data)
def generate_signal(d, A, amp_max, cc_multi): start_signal_generation = time_lib.clock() print(d["Signal_type"], " with repetition rate ", d["freq"], " Hz and ", np.round(d["T"] * 1000, 8), " ms pulse width") Sim_time = 1.0 / d["freq"] # always one pulse per simulation freq_max = d["freq"] * Sim_time / d["t_step"] print("Max frequency in the spectrum: ", freq_max / 2.0, "\n") FR_vector_signal = np.arange(0.0, freq_max, d["freq"]) n_time_max = int(Sim_time / d["t_step"]) t = [d["t_step"] * x for x in range(n_time_max)] # to generate the signal with its analytical formulation (by Trieu) II = np.pi phi = d["phi"] # signal shift in sec w0 = 2 * II * d["freq"] pw = d["T"] #pulse width Nmax = FR_vector_signal.shape[0] #signal_out = [] #Hf_signal = [] signal_out = np.ctypeslib.as_ctypes(np.zeros(len(t), float)) global shared_array shared_array = sharedctypes.RawArray(signal_out._type_, signal_out) Hf_signal = np.complex(0, 0) * np.zeros(Nmax - 1, float) for k in range(1, Nmax): if (d["Signal_type"] == 'Increasing Ramp'): # Ascending Ramp Hf_zero = A * pw / 2 / Sim_time # Hf1 at k=0 #Hf1 = 2*A/(Sim_time*pw)*(pw*np.exp(1j*w0*k*pw)/(1j*w0*k) + (np.exp(1j*w0*k*pw)-1)/(w0*k)**2) #Hf_signal.append(Hf1) Hf_signal[k - 1] = 2 * A / (Sim_time * pw) * (pw * np.exp(1j * w0 * k * pw) / (1j * w0 * k) + (np.exp(1j * w0 * k * pw) - 1) / (w0 * k)**2) elif (d["Signal_type"] == 'Decreasing Ramp'): # Descending Ramp Hf_zero = A * pw / 2 / Sim_time # Hf2 at k=0 #Hf2 = -2*A/(Sim_time*pw)*(pw/(1j*w0*k) + (np.exp(1j*w0*k*pw)-1)/(w0*k)**2) #Hf_signal.append(Hf2) Hf_signal[k - 1] = -2 * A / (Sim_time * pw) * (pw / (1j * w0 * k) + (np.exp(1j * w0 * k * pw) - 1) / (w0 * k)**2) elif (d["Signal_type"] == 'Central Triangle'): # Central Triangular Hf_zero = A * pw / 2 / Sim_time # Hf3 at k=0 #Hf3 = 4*A/(Sim_time*pw)*(( 2*np.exp(1j*w0*k*pw/2) - np.exp(1j*w0*k*pw)-1 )/(w0*k)**2) #Hf_signal.append(Hf3) Hf_signal[k - 1] = 4 * A / (Sim_time * pw) * ( (2 * np.exp(1j * w0 * k * pw / 2) - np.exp(1j * w0 * k * pw) - 1) / (w0 * k)**2) elif (d["Signal_type"] == 'Rectangle'): # Rectangular Hf_zero = A * pw / Sim_time # Hf4 at k=0 #Hf4 = 2*A/(Sim_time*1j*w0*k)*( np.exp(1j*w0*k*pw) -1 ) #Hf_signal.append(Hf4) Hf_signal[k - 1] = 2 * A / (Sim_time * 1j * w0 * k) * (np.exp(1j * w0 * k * pw) - 1) p = Pool() time_ind = np.arange(len(t)) res = p.map( partial(get_vector_in_time, Hf_zero, Hf_signal, w0, Nmax, phi, d["t_step"], n_time_max), time_ind) signal_out = np.ctypeslib.as_array(shared_array) p.terminate() #signal_out=np.asarray(signal_out) del Hf_signal signal_out_real = signal_out.real ## to construct the signal manually (quick approach but the signal is almost "untruncatable") #signal_out_real=manual_signal_out_generator(d,t,A) plt.figure(11111231) if d["current_control"] == 1 and cc_multi == False: plt.plot(t, signal_out_real) else: signal_out_scaled = [i * amp_max for i in signal_out_real] plt.plot(t, signal_out_scaled) plt.xlim(0.000, d["T"] * 5) plt.grid(True) plt.xlabel('t, sec') plt.ylabel('Signal amplitude (A or V)') plt.ticklabel_format(style='sci', axis='y', scilimits=(0, 0)) plt.savefig(os.environ['PATIENTDIR'] + '/Images/Signal.png', format='png', dpi=750) t = np.asarray(t) # get a Fourier transformation of the signal with np.fft.fft and recover to check with np.fft.ifft Fr_vect, Xs_vect = numpy_analog_digit_converter(t, signal_out_real, d["freq"], FR_vector_signal.shape[0], d["T"]) #==========Plots==========================================================# # these take time to generate # plt.figure(11) # plt.stem(Fr_vect, np.real(Xs_vect), markerfmt=" ") # plt.xscale("log") # plt.xlabel('Frequency, Hz') # plt.ylabel('Real part') # plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0)) # plt.savefig('Images/FT_real.png', format='png', dpi=1000) # plt.figure(12) # plt.stem(Fr_vect, np.imag(Xs_vect), markerfmt=" ") # plt.xscale("log") # plt.xlabel('Frequency, Hz') # plt.ylabel('Imaginary part') # plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0)) # plt.savefig('Images/FT_imag.png', format='png', dpi=1000) # '''Maybe we want to use less freq''' # plt.figure(111342) # plt.stem(Fr_vect, np.absolute(Xs_vect), markerfmt=" ",linefmt='C0',basefmt="C0-") # plt.xscale("log") # plt.xlabel('Frequency, Hz') # plt.ylabel('Amplitude') # plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0)) # plt.savefig('Images/FT_full_ampl.eps', format='eps', dpi=1000) minutes = int((time_lib.clock() - start_signal_generation) / 60) secnds = int(time_lib.clock() - start_signal_generation) - minutes * 60 print("----- Signal generation took ", minutes, " min ", secnds, " s -----") return t, signal_out_real, Xs_vect, Fr_vect
def overlap(cibra, ciket, nmo, nocc, s=None): '''Overlap between two CISD wavefunctions. Args: s : 2D array The overlap matrix of non-orthogonal one-particle basis ''' if s is None: return dot(cibra, ciket, nmo, nocc) DEBUG = True nvir = nmo - nocc nov = nocc * nvir bra0, bra1, bra2 = cisdvec_to_amplitudes(cibra, nmo, nocc) ket0, ket1, ket2 = cisdvec_to_amplitudes(ciket, nmo, nocc) # Sort the ket orbitals to make the orbitals in bra one-one mapt to orbitals # in ket. if ((not DEBUG) and abs(numpy.linalg.det(s[:nocc, :nocc]) - 1) < 1e-2 and abs(numpy.linalg.det(s[nocc:, nocc:]) - 1) < 1e-2): ket_orb_idx = numpy.where(abs(s) > 0.9)[1] s = s[:, ket_orb_idx] oidx = ket_orb_idx[:nocc] vidx = ket_orb_idx[nocc:] - nocc ket1 = ket1[oidx[:, None], vidx] ket2 = ket2[oidx[:, None, None, None], oidx[:, None, None], vidx[:, None], vidx] ooidx = numpy.tril_indices(nocc, -1) vvidx = numpy.tril_indices(nvir, -1) bra2aa = bra2 - bra2.transpose(1, 0, 2, 3) bra2aa = lib.take_2d(bra2aa.reshape(nocc**2, nvir**2), ooidx[0] * nocc + ooidx[1], vvidx[0] * nvir + vvidx[1]) ket2aa = ket2 - ket2.transpose(1, 0, 2, 3) ket2aa = lib.take_2d(ket2aa.reshape(nocc**2, nvir**2), ooidx[0] * nocc + ooidx[1], vvidx[0] * nvir + vvidx[1]) occlist0 = numpy.arange(nocc).reshape(1, nocc) occlists = numpy.repeat(occlist0, 1 + nov + bra2aa.size, axis=0) occlist0 = occlists[:1] occlist1 = occlists[1:1 + nov] occlist2 = occlists[1 + nov:] ia = 0 for i in range(nocc): for a in range(nocc, nmo): occlist1[ia, i] = a ia += 1 ia = 0 for i in range(nocc): for j in range(i): for a in range(nocc, nmo): for b in range(nocc, a): occlist2[ia, i] = a occlist2[ia, j] = b ia += 1 na = len(occlists) if DEBUG: trans = numpy.empty((na, na)) for i, idx in enumerate(occlists): s_sub = s[idx].T.copy() minors = s_sub[occlists] trans[i, :] = numpy.linalg.det(minors) # Mimic the transformation einsum('ab,ap->pb', FCI, trans). # The wavefunction FCI has the [excitation_alpha,excitation_beta] # representation. The zero blocks like FCI[S_alpha,D_beta], # FCI[D_alpha,D_beta], are explicitly excluded. bra_mat = numpy.zeros((na, na)) bra_mat[0, 0] = bra0 bra_mat[0, 1:1 + nov] = bra_mat[1:1 + nov, 0] = bra1.ravel() bra_mat[0, 1 + nov:] = bra_mat[1 + nov:, 0] = bra2aa.ravel() bra_mat[1:1 + nov, 1:1 + nov] = bra2.transpose(0, 2, 1, 3).reshape(nov, nov) ket_mat = numpy.zeros((na, na)) ket_mat[0, 0] = ket0 ket_mat[0, 1:1 + nov] = ket_mat[1:1 + nov, 0] = ket1.ravel() ket_mat[0, 1 + nov:] = ket_mat[1 + nov:, 0] = ket2aa.ravel() ket_mat[1:1 + nov, 1:1 + nov] = ket2.transpose(0, 2, 1, 3).reshape(nov, nov) ovlp = lib.einsum('ab,ap,bq,pq->', bra_mat, trans, trans, ket_mat) else: nov1 = 1 + nov noovv = bra2aa.size bra_SS = numpy.zeros((nov1, nov1)) bra_SS[0, 0] = bra0 bra_SS[0, 1:] = bra_SS[1:, 0] = bra1.ravel() bra_SS[1:, 1:] = bra2.transpose(0, 2, 1, 3).reshape(nov, nov) ket_SS = numpy.zeros((nov1, nov1)) ket_SS[0, 0] = ket0 ket_SS[0, 1:] = ket_SS[1:, 0] = ket1.ravel() ket_SS[1:, 1:] = ket2.transpose(0, 2, 1, 3).reshape(nov, nov) trans_SS = numpy.empty((nov1, nov1)) trans_SD = numpy.empty((nov1, noovv)) trans_DS = numpy.empty((noovv, nov1)) occlist01 = occlists[:nov1] for i, idx in enumerate(occlist01): s_sub = s[idx].T.copy() minors = s_sub[occlist01] trans_SS[i, :] = numpy.linalg.det(minors) minors = s_sub[occlist2] trans_SD[i, :] = numpy.linalg.det(minors) s_sub = s[:, idx].copy() minors = s_sub[occlist2] trans_DS[:, i] = numpy.linalg.det(minors) ovlp = lib.einsum('ab,ap,bq,pq->', bra_SS, trans_SS, trans_SS, ket_SS) ovlp += lib.einsum('ab,a ,bq, q->', bra_SS, trans_SS[:, 0], trans_SD, ket2aa.ravel()) ovlp += lib.einsum('ab,ap,b ,p ->', bra_SS, trans_SD, trans_SS[:, 0], ket2aa.ravel()) ovlp += lib.einsum(' b, p,bq,pq->', bra2aa.ravel(), trans_SS[0, :], trans_DS, ket_SS) ovlp += lib.einsum(' b, p,b ,p ->', bra2aa.ravel(), trans_SD[0, :], trans_DS[:, 0], ket2aa.ravel()) ovlp += lib.einsum('a ,ap, q,pq->', bra2aa.ravel(), trans_DS, trans_SS[0, :], ket_SS) ovlp += lib.einsum('a ,a , q, q->', bra2aa.ravel(), trans_DS[:, 0], trans_SD[0, :], ket2aa.ravel()) # FIXME: whether to approximate the overlap between double excitation coefficients if numpy.linalg.norm(bra2aa) * numpy.linalg.norm(ket2aa) < 1e-4: # Skip the overlap if coefficients of double excitation are small enough pass if (abs(numpy.linalg.det(s[:nocc, :nocc]) - 1) < 1e-2 and abs(numpy.linalg.det(s[nocc:, nocc:]) - 1) < 1e-2): # If the overlap matrix close to identity enough, use the <D|D'> overlap # for orthogonal single-particle basis to approximate the overlap # for non-orthogonal basis. ovlp += numpy.dot(bra2aa.ravel(), ket2aa.ravel()) * trans_SS[0, 0] * 2 else: from multiprocessing import sharedctypes, Process buf_ctypes = sharedctypes.RawArray('d', noovv) trans_ket = numpy.ndarray(noovv, buffer=buf_ctypes) def trans_dot_ket(i0, i1): for i in range(i0, i1): s_sub = s[occlist2[i]].T.copy() minors = s_sub[occlist2] trans_ket[i] = numpy.linalg.det(minors).dot(ket2aa.ravel()) nproc = lib.num_threads() if nproc > 1: seg = (noovv + nproc - 1) // nproc ps = [] for i0, i1 in lib.prange(0, noovv, seg): p = Process(target=trans_dot_ket, args=(i0, i1)) ps.append(p) p.start() [p.join() for p in ps] else: trans_dot_ket(0, noovv) ovlp += numpy.dot(bra2aa.ravel(), trans_ket) * trans_SS[0, 0] * 2 return ovlp
from coreUtils import * import multiprocessing as mp from multiprocessing import sharedctypes as sctypes import ctypes as c #Get a fairly large shared array _Ashared = sctypes.RawArray(c.c_double, 10000000) #ten millions values are about 40MB def slicedInverseWorker(Ain, N, dim, nStart=0, nEnd=None): nEnd = nEnd if nEnd is not None else N if isinstance(Ain, np.ndarray): AinAsNp = Ain else: if len(Ain) == N * dim * dim: AinAsNp = np.frombuffer(Ain) else: AinAsNp = np.frombuffer(Ain)[:N * dim * dim] AinAsNp.resize(N, dim, dim) for n in range(nStart, nEnd): AinAsNp[n, :, :] = inv(AinAsNp[n, :, :], overwrite_a=True, check_finite=False) return 0 #slicedInverseWorkerInherit = lambda N,dim,nStart=0,nEnd=None : slicedInverseWorker(_Ashared,N,dim,nStart,nEnd) def slicedInverseWorkerInherit(N, dim, nStart=0, nEnd=None):
def __init__(self, seq, num_workers=0, buffer_size=10, init_fn=None, shm_size=0): if num_workers <= 0: num_workers = multiprocessing.cpu_count() - num_workers if num_workers <= 0: raise ValueError("at least one worker required") if buffer_size < num_workers: raise ValueError("at least one buffer slot required by worker") if shm_size > 0 and sys.version_info < (3, 8): raise NotImplementedError("shm support requires python>=3.8") if shm_size > 0 and platform.python_implementation() == "PyPy": raise NotImplementedError("shm support broken on PyPy") # allocate shared memory for zero-copy transfers from workers self.shm = sharedctypes.RawArray('B', shm_size) self.shm_slot_size = shm_size // buffer_size # shm slot are identified by their byte offset if shm_size > 0: self.free_shm_slots = { i * self.shm_slot_size for i in range(buffer_size) } else: self.free_shm_slots = set() # initialize workers self.job_queue = multiprocessing.Queue() self.result_pipes = [] self.workers = [] for _ in range(num_workers): rx, tx = multiprocessing.Pipe(duplex=False) worker = multiprocessing.Process( target=self.__class__.worker, args=(seq, self.job_queue, self.shm, self.shm_slot_size, tx, init_fn), daemon=True) old_sig_hdl = signal.signal(signal.SIGINT, signal.SIG_IGN) worker.start() signal.signal(signal.SIGINT, old_sig_hdl) tx.close() self.result_pipes.append(rx) self.workers.append(worker) # monitor workers self.worker_died = threading.Event() worker_monitor = threading.Thread(target=self.monitor_workers, args=(self.workers, self.worker_died), daemon=True) worker_monitor.start() # set cleanup hooks weakref.finalize(self, ProcessBacked.cleanup, self.job_queue, self.workers, worker_monitor)
def slicedInversion(Ain, cpy=True, NnDim=None, returnNp=True): NnDim = Ain.shape[:2] if NnDim is None else NnDim doParallel = NnDim[0] * NnDim[1] > 1000 if doParallel: if cpy: if isinstance(Ain, np.ndarray): Ain2 = Ain.copy() else: try: Ain2 = sctypes.copy(Ain) except: Ain2 = sctypes.copy(Ain.get_obj()) else: Ain2 = Ain if isinstance(Ain, np.ndarray): Ain2 = Ain2.ravel() NperSlice = NnDim[1]**2 slicePerLoop = 10000000 // NperSlice for k in range(NnDim[0] // slicePerLoop + 1): thisN = min(slicePerLoop, NnDim[0] - k * slicePerLoop) #copy the data _Ashared[:thisN * NperSlice] = Ain2[k * slicePerLoop * NperSlice:(k * slicePerLoop + thisN) * NperSlice] #Distribute work indList = np.linspace(0, thisN, 5, dtype=np.int_) LL = lmap(lambda i: [thisN, NnDim[1], indList[i], indList[i + 1]], range(4)) #Do the work multiProcWorkers.starmap(slicedInverseWorkerInherit, LL) #copy back Ain2[k * slicePerLoop * NperSlice:(k * slicePerLoop + thisN) * NperSlice] = _Ashared[:thisN * NperSlice] if isinstance(Ain, np.ndarray) and (not cpy): #This is some extra work to keep consistency Atemp = np.frombuffer(_Ashared)[:thisN * NperSlice] Atemp.resize((thisN, NnDim[1], NnDim[1])) Ain[k * slicePerLoop:k * slicePerLoop + thisN, :, :] = Atemp #Done this loop else: if cpy: try: Ain2 = Ain.copy() if isinstance( Ain, np.ndarray) else sctypes.copy(Ain) except AttributeError: Ain2 = sctypes.copy(Ain.get_obj) else: Ain2 = Ain slicedInverseWorker(Ain2, NnDim[0], NnDim[1]) if returnNp and not isinstance(Ain2, np.ndarray): try: out = np.frombuffer(Ain2) except: out = np.frombuffer(Ain2.get_obj()) out.resize((NnDim[0], NnDim[1], NnDim[1])) elif returnNp: out = Ain2 out.resize((NnDim[0], NnDim[1], NnDim[1])) if not returnNp and isinstance(Ain2, np.ndarray): out = sctypes.RawArray(c.c_double, Ain2.size) out[:] = Ain2.ravel() elif not returnNp: out = Ain2 return out
def test_global_handle(self): """ Test ID: DAO Test Description: Use a pool handle in another process. :avocado: tags=container,conthandle,vm,small,regression """ try: # use the uid/gid of the user running the test, these should # be perfectly valid createuid = os.geteuid() creategid = os.getegid() # parameters used in pool create that are in yaml createmode = self.params.get("mode", '/run/testparams/createmode/') createsetid = self.params.get("setname", '/run/testparams/createset/') createsize = self.params.get("size", '/run/testparams/createsize/') # initialize a python pool object then create the underlying # daos storage pool = DaosPool(self.Context) pool.create(createmode, createuid, creategid, createsize, createsetid, None) pool.connect(1 << 1) # create a pool global handle iov_len, buf_len, buf = pool.local2global() buftype = ctypes.c_byte * buf_len c_buf = buftype.from_buffer(buf) sct_pool_handle = sharedctypes.RawValue( IOV, ctypes.cast(c_buf, ctypes.c_void_p), buf_len, iov_len) # create a container container = DaosContainer(self.Context) container.create(pool.handle) container.open() # create a container global handle iov_len, buf_len, buf = container.local2global() buftype = ctypes.c_byte * buf_len c_buf = buftype.from_buffer(buf) sct_cont_handle = sharedctypes.RawValue( IOV, ctypes.cast(c_buf, ctypes.c_void_p), buf_len, iov_len) sct_pool_uuid = sharedctypes.RawArray(ctypes.c_byte, pool.uuid) # this should work in the future but need on-line server addition #arg_list = ( #p = Process(target=CheckHandle, args=arg_list) #p.start() #p.join() # for now verifying global handle in the same process which is not # the intended use case CheckHandle(sct_pool_handle, sct_pool_uuid, sct_cont_handle, 0) except DaosApiError as e: print(e) print(traceback.format_exc()) self.fail("Expecting to pass but test has failed.\n")
def to_shared_memory(array): array = np.ctypeslib.as_ctypes(array) return sharedctypes.RawArray(array._type_, array)