def test_KiB_shared_zeros(): """test sharedmem.zeros for arrays on the order of 2**16, single axis types""" for typestr in numtypes: shape = (2**16, ) a = sharedmem.zeros(shape, dtype=typestr) t = (a == np.zeros(shape)) assert t.all()
def test_MiB_shared_zeros(): """test sharedmem.zeros for arrays on the order 2**21 bytyes, single axis uint8""" shape = (2**21, ) a = sharedmem.zeros(shape, dtype='uint8') t = (a == np.zeros(shape)) assert t.all()
def test_shared_zeros(): """test sharedmem.zeros for small single axis types""" for typestr in numtypes: shape = (10, ) a = sharedmem.zeros(shape, dtype=typestr) t = (a == np.zeros(shape)) assert t.all()
def test_two_subprocesses_no_pickle(): #setup shape = (4, ) a = sharedmem.zeros(shape, dtype='float64') a = sharedmem.zeros(shape) print "pid ", os.getpid(), ":", a lck = multiprocessing.Lock() def modify_array(a, lck): # a = pickle.loads(a) with lck: #lck.acquire() a[0] = 1 a[1] = 2 a[2] = 3 # lck.release() print "pid ", os.getpid(), "modified array" p = multiprocessing.Process(target=modify_array, args=(a, lck)) p.start() # poll for the result super inefficent! t0 = time.time() t1 = t0 + 10 nn = 0 while True: if a[0]: with lck: # lck.acquire() t = (a == np.array([1, 2, 3, 0], dtype='float64')) # lck.release() break if time.time() > t1: # use timeout instead break nn += 1 # this will raise an exception if timeout print "pid ", os.getpid(), t assert t.all() print "finished (from %s)" % os.getpid() p.join() print a
def proc(qin, qout): print("grabbing array from queue") a = qin.get() print(a) print("putting array in queue") b = shm.zeros(10) print(b) qout.put(b) print("waiting for array to be updated by another process") a = qin.get() print(b)
def test_two_subprocesses_with_pickle(): shape = (4, ) a = sharedmem.zeros(shape, dtype='float64') a = sharedmem.zeros(shape) print "pid ", os.getpid(), ":", a pa = pickle.dumps(a) lck = multiprocessing.Lock() def modify_array(pa, lck): a = pickle.loads(pa) with lck: a[0] = 1 a[1] = 2 a[2] = 3 print "pid ", os.getpid(), "modified array" p = multiprocessing.Process(target=modify_array, args=(pa, lck)) p.start() t0 = time.time() t1 = t0 + 10 nn = 0 while True: if a[0]: with lck: t = (a == np.array([1, 2, 3, 0], dtype='float64')) break if time.time() > t1: # use timeout instead break nn += 1 print "pid ", os.getpid(), t, "nn:", nn assert t.all() print "finished (from %s)" % os.getpid() p.join() print a
def parallelUpdateUV(self, X, U, V, muU, muV, numBlocks, rowBlockSize, colBlockSize, rowIsFree, colIsFree, indPtr, colInds, lock, gi, gp, gq, normGp, normGq, iterationsPerBlock, loopInd): m, n = X.shape gradientsPerBlock = sharedmem.zeros((numBlocks, numBlocks)) #Set up order of indices for stochastic methods permutedRowInds = numpy.array(numpy.random.permutation(m), numpy.uint32) permutedColInds = numpy.array(numpy.random.permutation(n), numpy.uint32) for i in range(numBlocks): for j in range(numBlocks): gradientsPerBlock[i, j] = numpy.ceil( float(max(m, n)) / (numBlocks**2)) assert gradientsPerBlock.sum() >= max(m, n) #print(gradientsPerBlock.sum()) #Compute omega for each col block omegasList = [] for i in range(numBlocks): blockColInds = permutedColInds[i * colBlockSize:(i + 1) * colBlockSize] omegasList.append(restrictOmega(indPtr, colInds, blockColInds)) processList = [] if self.numProcesses != 1: for i in range(self.numProcesses): learner = self.copy() learner.learnerCython = self.getCythonLearner() sharedArgs = rowIsFree, colIsFree, iterationsPerBlock, gradientsPerBlock, U, V, muU, muV, lock methodArgs = learner, rowBlockSize, colBlockSize, indPtr, colInds, permutedRowInds, permutedColInds, gi, gp, gq, normGp, normGq, i, loopInd, omegasList process = multiprocessing.Process(target=updateUVBlock, args=(sharedArgs, methodArgs)) process.start() processList.append(process) for process in processList: process.join() else: learner = self.copy() learner.learnerCython = self.getCythonLearner() sharedArgs = rowIsFree, colIsFree, iterationsPerBlock, gradientsPerBlock, U, V, muU, muV, lock methodArgs = learner, rowBlockSize, colBlockSize, indPtr, colInds, permutedRowInds, permutedColInds, gi, gp, gq, normGp, normGq, 0, loopInd, omegasList updateUVBlock(sharedArgs, methodArgs)
def __init__(self, servo_horizontal, m_horizontal, b_horizontal, servo_vertical, m_vertical, b_vertical, f, speed, pwm_min, pwm_max): # Initialize multiprocessing.Process parent multiprocessing.Process.__init__(self) # Exit event for stopping process self._exit = multiprocessing.Event() # Set servo pins self._s_h = servo_horizontal self._s_v = servo_vertical # Set slopes self._m_horizontal = m_horizontal self._m_vertical = m_vertical # Set intercepts self._b_horizontal = b_horizontal self._b_vertical = b_vertical # Set update frequency self._f = 1.0 / f # Set increment per time step self._increment = speed / f # Set minimum and maximum pwm ranges self._pwm_min = pwm_min self._pwm_max = pwm_max # An array in shared memory for storing the new desired servo position angles self._newangles = sharedmem.zeros((2, 1), dtype='float') # A list containing the current servo angles self._currentangles = sharedmem.ones((2, 1), dtype='float') # Initialize ServoBlaster device self._servoblaster = open('/dev/servoblaster', 'w')
def GetBRDF(BRDF_file, NumberOfBands = 7, NumberOfParameters = 3): # Check that file exists File = glob.glob( BRDF_file ) if len(File) == 1: #Get raster size dataset = gdal.Open( BRDF_file, GA_ReadOnly ) rows, cols, Bands = GetDimensions( BRDF_file ) else: # File does not exist, create onlt empty array with nominal tile size rows = 2400 cols = 2400 #Get BRDF parameters f0 = shm.zeros((rows,cols, NumberOfBands), dtype = numpy.float32) f1 = shm.zeros((rows,cols, NumberOfBands), dtype = numpy.float32) f2 = shm.zeros((rows,cols, NumberOfBands), dtype = numpy.float32) f0_var = shm.zeros((rows,cols, NumberOfBands), dtype = numpy.float32) f1_var = shm.zeros((rows,cols, NumberOfBands), dtype = numpy.float32) f2_var = shm.zeros((rows,cols, NumberOfBands), dtype = numpy.float32) # Number of weighted samples used for BRDF inversion NSamples = shm.zeros((rows,cols), dtype = numpy.float32) if len(File) == 1: # Fill BRDF parameters array for band in range( NumberOfBands ): f0[:,:,band] = dataset.GetRasterBand((band * NumberOfParameters) + 1).ReadAsArray() f1[:,:,band] = dataset.GetRasterBand((band * NumberOfParameters) + 2).ReadAsArray() f2[:,:,band] = dataset.GetRasterBand((band * NumberOfParameters) + 3).ReadAsArray() f0_var[:,:,band] = dataset.GetRasterBand((band * NumberOfParameters) + \ (NumberOfParameters * NumberOfBands) + 1).ReadAsArray() f1_var[:,:,band] = dataset.GetRasterBand((band * NumberOfParameters) + \ (NumberOfParameters * NumberOfBands) + 2).ReadAsArray() f2_var[:,:,band] = dataset.GetRasterBand((band * NumberOfParameters) + \ (NumberOfParameters * NumberOfBands) + 3).ReadAsArray() NSamples = dataset.GetRasterBand( ((NumberOfParameters * NumberOfBands) * 2) + 2).ReadAsArray() return ReturnGetBRDF(f0, f1, f2, f0_var, f1_var, f2_var, NSamples)
try: video_src = video_src[0] except: video_src = 'synth:bg=../cpp/lena.jpg:noise=0.05' args = dict(args) cascade_fn = args.get('--cascade', "haarcascades/haarcascade_frontalface_alt.xml") cascade = cv2.CascadeClassifier(cascade_fn) cascade_nested = cv2.CascadeClassifier(cascade_fn) cam = create_capture(video_src) # print dir(cv), [x for x in dir(cv) if "exposure" in x.lower()] # print cam.get(cv.CV_CAP_PROP_EXPOSURE) ret, img = cam.read() width = img.shape[0] height = img.shape[1] low_image = shm.zeros(img.shape[0:2], dtype=img.dtype) # controller_thread = threading.Thread(target=controller, args=(controller_state, stopping, controller_cv, the_lock)) # controller_thread = mp.Process(target=controller, args=(controller_state, stopping, controller_cv, the_lock)) # controller_thread.start() targeter = Targeter(height, width, add_controller_command) primed = True firing = False recognizing = False locked_counter = 0 with recog.RecognizerManager() as recognizer: while True: t = clock()
video_src = 'synth:bg=../cpp/lena.jpg:noise=0.05' args = dict(args) cascade_fn = args.get('--cascade', "haarcascades/haarcascade_frontalface_alt.xml") cascade = cv2.CascadeClassifier(cascade_fn) cascade_nested = cv2.CascadeClassifier(cascade_fn) cam = create_capture(video_src) # print dir(cv), [x for x in dir(cv) if "exposure" in x.lower()] # print cam.get(cv.CV_CAP_PROP_EXPOSURE) ret, img = cam.read() width = img.shape[0] height = img.shape[1] low_image = shm.zeros(img.shape[0:2], dtype=img.dtype) # controller_thread = threading.Thread(target=controller, args=(controller_state, stopping, controller_cv, the_lock)) # controller_thread = mp.Process(target=controller, args=(controller_state, stopping, controller_cv, the_lock)) # controller_thread.start() targeter = Targeter(height, width, add_controller_command) primed = True firing = False recognizing = False locked_counter = 0 with recog.RecognizerManager() as recognizer: while True: t = clock()
# Create the SVM classifier print "Trainning the classifier..." kernel='rbf' #class_weight = 'auto' probability = True cache_size=200 clf = svm.SVC( kernel = kernel, probability = probability, cache_size = cache_size ) clf.fit(X, y) # Save the trainned model # To load: clf = joblib.load('MELODIES_SVM_clf.pkl') joblib.dump(clf, 'MELODIES_SVM_clf.pkl') # Shared memory output arrays classification = shm.zeros ( ( NIR.shape[1], NIR.shape[2] ), dtype = np.int8 ) probability = shm.zeros ( ( NIR.shape[1], NIR.shape[2], classes.shape[0] ), dtype = np.float32 ) Processes = [] NumProcesses = 12 # Number of cores available to do the processing LineToProcess = iRow # Run until all the threads are done, and there is no pixels to process while Processes or LineToProcess < eRow: # if we aren't using all the processors AND there is interpolations left to # compute, then spawn another thread if (len(Processes) < NumProcesses) and LineToProcess < eRow: p = Process(target = DoClassification, args=[LineToProcess, iCol, eCol])
NIR_file = sys.argv[1] try: print NIR_file NIR_dataset = gdal.Open( NIR_file, GA_ReadOnly ) rows, cols, BandCount = GetDimensions(NIR_file) # Get band names and delete first band since is the name if the layerstack BandNames = NIR_dataset.GetFileList() BandNames.remove(BandNames[0]) except: print "Error:", sys.exc_info()[0] exit(-1) # Create empty array NIR = numpy.zeros((rows,cols,BandCount), numpy.float32) InterpolatedNIR = shm.zeros((rows,cols,BandCount), dtype=numpy.float32) # Populate array for band in range(BandCount): NIR[:,:,band] = NIR_dataset.GetRasterBand(band+1).ReadAsArray() NIR_dataset = None Processes = [] NumProcesses = 12 # Number of cores available to do the processing LineToProcess = 0 # Run until all the threads are done, and there is no pixels to process while Processes or LineToProcess < rows: # if we aren't using all the processors AND there is interpolations left to # compute, then spawn another thread
def make_ea(ev): return sharedmem.zeros((repeats + 1, len(ev)), dtype=float)
def bootstrap(fit, xdata, ydata, CI, shuffle_method=bootstrap_residuals, shuffle_args=(), shuffle_kwrds={}, repeats=3000, eval_points=None, full_results=False, nb_workers=None, extra_attrs=(), fit_args=(), fit_kwrds={}): """ This function implement the bootstrap algorithm for a regression algorithm. It is capable of spreading the load across many threads using shared memory and the :py:mod:`multiprocess` module. :type fit: callable :param fit: Method used to compute regression. The call is:: f = fit(xdata, ydata, *fit_args, **fit_kwrds) Fit should return an object that would evaluate the regression on a set of points. The next call will be:: f(eval_points) :type xdata: ndarray of shape (N,) or (k,N) for function with k predictors :param xdata: The independent variable where the data is measured :type ydata: ndarray :param ydata: The dependant data :type CI: tuple of float :param CI: List of percentiles to extract :type shuffle_method: callable :param shuffle_method: Create shuffled dataset. The call is:: shuffle_method(xdata, ydata, y_est, repeat=repeats, *shuffle_args, **shuffle_kwrds) where ``y_est`` is the estimated dependant variable on the xdata. :type shuffle_args: tuple :param shuffle_args: List of arguments for the shuffle method :type shuffle_kwrds: dict :param shuffle_kwrds: Dictionnary of arguments for the shuffle method :type repeats: int :param repeats: Number of repeats for the bootstraping :type eval_points: ndarray or None :param eval_points: List of points to evaluate. If None, eval_point is xdata. :type full_results: bool :param full_results: if True, output also the whole set of evaluations :type nb_workers: int or None :param nb_worders: Number of worker threads. If None, the number of detected CPUs will be used. And if 1 or less, a single thread will be used. :type extra_attrs: tuple of str :param extra_attrs: List of attributes of the fitting method to extract on top of the y values for confidence intervals :type fit_args: tuple :param fit_args: List of extra arguments for the fit callable :type fit_kwrds: dict :param fit_kwrds: Dictionnary of extra named arguments for the fit callable :rtype: :py:class:`BootstrapResult` :return: Estimated y on the data, on the evaluation points, the requested confidence intervals and, if requested, the shuffled X, Y and the full estimated distributions. """ xdata = np.asarray(xdata) ydata = np.asarray(ydata) y_fit = fit(xdata, ydata, *fit_args, **fit_kwrds) y_fit.fit() shuffled_x, shuffled_y = shuffle_method(y_fit, xdata, ydata, repeats=repeats, *shuffle_args, **shuffle_kwrds) nx = shuffled_x.shape[-2] ny = shuffled_y.shape[0] extra_values = [] for attr in extra_attrs: extra_values.append(getattr(y_fit, attr)) if eval_points is None: eval_points = xdata if nb_workers is None: nb_workers = mp.cpu_count() multiprocess = nb_workers > 1 # Copy everything in shared mem if multiprocess: ra = sharedmem.zeros((repeats + 1, len(eval_points)), dtype=float) result_array = ra.np sx = sharedmem.array(shuffled_x) sy = sharedmem.array(shuffled_y) ep = sharedmem.array(eval_points) def make_ea(ev): return sharedmem.zeros((repeats + 1, len(ev)), dtype=float) eas = [make_ea(ev) for ev in extra_values] extra_arrays = [ea.np for ea in eas] pool = mp.Pool(mp.cpu_count(), bootstrap_workers.initialize_shared, (nx, ny, ra, eas, sx, sy, ep, extra_attrs, fit, fit_args, fit_kwrds)) else: result_array = np.empty((repeats + 1, len(eval_points)), dtype=float) def make_ea(ev): return np.empty((repeats + 1, len(ev)), dtype=float) extra_arrays = [make_ea(ev) for ev in extra_values] bootstrap_workers.initialize(nx, ny, result_array, extra_arrays, shuffled_x, shuffled_y, eval_points, extra_attrs, fit, fit_args, fit_kwrds) result_array[0] = y_fit(eval_points) for ea, ev in izip(extra_arrays, extra_values): ea[0] = ev base_repeat = repeats // nb_workers if base_repeat * nb_workers < repeats: base_repeat += 1 for i in irange(nb_workers): end_repeats = (i + 1) * base_repeat if end_repeats > repeats: end_repeats = repeats if multiprocess: pool.apply_async(bootstrap_workers.bootstrap_result, (i, i * base_repeat, end_repeats)) else: bootstrap_workers.bootstrap_result(i, i * base_repeat, end_repeats) if multiprocess: pool.close() pool.join() CIs = getCIs(CI, result_array, *extra_arrays) # copy the array to not return a view on a larger array y_eval = np.array(result_array[0]) if not full_results: shuffled_y = shuffled_x = result_array = None extra_arrays = () elif multiprocess: result_array = result_array.copy() # copy in local memory extra_arrays = [ea.copy for ea in extra_arrays] return BootstrapResult(y_fit, y_fit(xdata), eval_points, y_eval, tuple(CI), CIs, shuffled_x, shuffled_y, result_array)
import numpy import sharedmem as shm import pickle def modify_array(a): # a = pickle.loads(a) a[:, :3] = 1 #print a.shape print "modified array in modify_array" #shm.cleanup() from multiprocessing import Pool, Process if __name__ == '__main__': a = shm.zeros((2, 4)) print "original process:", a p = Pool() job = p.apply_async(modify_array, (a, )) p.close() p.join() print "reprint a in original process:", a
# Get only the last 10 years of data lc_years = numberOfBands / numberOfClasses lcp = lcp[ ( ( lc_years - numberOfYears ) * numberOfClasses ) : ] # Get masl fname = DataDir + '/' + 'NumberOfAggLC_changes_masked.tif' mask = gdal.Open( fname ).ReadAsArray() # Default trajectories for all classes/years classes = np.repeat( np.arange( 1, numberOfClasses + 1 ), numberOfYears ) \ .reshape( numberOfClasses, numberOfYears ).T classesTrajectories = cartesian( classes ) # Output arrays # LC change probabilities lcc_prob = shm.zeros( (rows, cols, 10 ), dtype = np.float32 ) # LC change trajectories lcc_traj = shm.zeros( (rows, cols, 10 ), dtype = np.uint64 ) # Processing only pixels where there was a LC change print 'Processing only pixels where there was a LC change...' indices_mask = np.where( mask > 0 ) print 'Total number of pixels to process:', indices_mask[0].shape[0] # Aprox 347k pixels Processes = [] NumProcesses = 8 # Number of cores available to do the processing PixelToProcess = 0 # Run until all the threads are done, and there is no pixels to process while Processes or PixelToProcess < indices_mask[0].shape[0]:
def parallelUpdateUV( self, X, U, V, muU, muV, numBlocks, rowBlockSize, colBlockSize, rowIsFree, colIsFree, indPtr, colInds, lock, gi, gp, gq, normGp, normGq, iterationsPerBlock, loopInd, ): m, n = X.shape gradientsPerBlock = sharedmem.zeros((numBlocks, numBlocks)) # Set up order of indices for stochastic methods permutedRowInds = numpy.array(numpy.random.permutation(m), numpy.uint32) permutedColInds = numpy.array(numpy.random.permutation(n), numpy.uint32) for i in range(numBlocks): for j in range(numBlocks): gradientsPerBlock[i, j] = numpy.ceil(float(max(m, n)) / (numBlocks ** 2)) assert gradientsPerBlock.sum() >= max(m, n) # print(gradientsPerBlock.sum()) # Compute omega for each col block omegasList = [] for i in range(numBlocks): blockColInds = permutedColInds[i * colBlockSize : (i + 1) * colBlockSize] omegasList.append(restrictOmega(indPtr, colInds, blockColInds)) processList = [] if self.numProcesses != 1: for i in range(self.numProcesses): learner = self.copy() learner.learnerCython = self.getCythonLearner() sharedArgs = rowIsFree, colIsFree, iterationsPerBlock, gradientsPerBlock, U, V, muU, muV, lock methodArgs = ( learner, rowBlockSize, colBlockSize, indPtr, colInds, permutedRowInds, permutedColInds, gi, gp, gq, normGp, normGq, i, loopInd, omegasList, ) process = multiprocessing.Process(target=updateUVBlock, args=(sharedArgs, methodArgs)) process.start() processList.append(process) for process in processList: process.join() else: learner = self.copy() learner.learnerCython = self.getCythonLearner() sharedArgs = rowIsFree, colIsFree, iterationsPerBlock, gradientsPerBlock, U, V, muU, muV, lock methodArgs = ( learner, rowBlockSize, colBlockSize, indPtr, colInds, permutedRowInds, permutedColInds, gi, gp, gq, normGp, normGq, 0, loopInd, omegasList, ) updateUVBlock(sharedArgs, methodArgs)
print BRDFNoSnowFile BRDFNoSnow = GetBRDF( BRDFNoSnowFile ) BRDFSnowFile = sys.argv[2] print BRDFSnowFile BRDFSnow = GetBRDF( BRDFSnowFile ) OutputDir = sys.argv[3] # First 7 MODIS bands rows = 2400 cols = 2400 NumberOfBands = 7 NumberOfParameters = 3 # Output array - reflectance bands, corresponding uncer, and Snow proportion BHR = shm.zeros( (rows,cols, (NumberOfBands * 2) + 1 ), dtype = numpy.float32) Processes = [] NumProcesses = 12 # Number of cores available to do the processing LineToProcess = 0 ComputeBHR(LineToProcess, cols, NumberOfBands, BRDFNoSnow, BRDFSnow) while Processes or LineToProcess < rows: # if we aren't using all the processors AND there are lines left to # compute, then spawn another thread if (len(Processes) < NumProcesses) and LineToProcess < rows: p = Process( target = ComputeBHR, args=[LineToProcess, cols, NumberOfBands, BRDFNoSnow, BRDFSnow] ) p.daemon = True p.name = str(LineToProcess)
def parallelLearnModel(self, X, verbose=False, U=None, V=None): """ Max local AUC with Frobenius norm penalty on V. Solve with parallel (stochastic) gradient descent. The input is a sparse array. """ #Convert to a csarray for faster access if scipy.sparse.issparse(X): logging.debug("Converting to csarray") X2 = sppy.csarray(X, storagetype="row") X = X2 m, n = X.shape #We keep a validation set in order to determine when to stop if self.validationUsers != 0: numValidationUsers = int(m * self.validationUsers) trainX, testX, rowSamples = Sampling.shuffleSplitRows( X, 1, self.validationSize, numRows=numValidationUsers)[0] testIndPtr, testColInds = SparseUtils.getOmegaListPtr(testX) else: trainX = X testX = None rowSamples = None testIndPtr, testColInds = None, None #Not that to compute the test AUC we pick i \in X and j \notin X \cup testX indPtr, colInds = SparseUtils.getOmegaListPtr(trainX) allIndPtr, allColInds = SparseUtils.getOmegaListPtr(X) if U == None or V == None: U, V = self.initUV(trainX) if self.metric == "f1": metricInd = 2 elif self.metric == "mrr": metricInd = 3 else: raise ValueError("Unknown metric: " + self.metric) bestMetric = 0 bestU = 0 bestV = 0 trainMeasures = [] testMeasures = [] loopInd = 0 lastObj = 0 currentObj = lastObj - 2 * self.eps numBlocks = self.numProcesses + 1 gi, gp, gq = self.computeGipq(X) normGp, normGq = self.computeNormGpq(indPtr, colInds, gp, gq, m) #Some shared variables rowIsFree = sharedmem.ones(numBlocks, dtype=numpy.bool) colIsFree = sharedmem.ones(numBlocks, dtype=numpy.bool) #Create shared factors U2 = sharedmem.zeros((m, self.k)) V2 = sharedmem.zeros((n, self.k)) muU2 = sharedmem.zeros((m, self.k)) muV2 = sharedmem.zeros((n, self.k)) U2[:] = U[:] V2[:] = V[:] muU2[:] = U[:] muV2[:] = V[:] del U, V rowBlockSize = int(numpy.ceil(float(m) / numBlocks)) colBlockSize = int(numpy.ceil(float(n) / numBlocks)) lock = multiprocessing.Lock() startTime = time.time() loopInd = 0 iterationsPerBlock = sharedmem.zeros((numBlocks, numBlocks)) self.learnerCython = self.getCythonLearner() nextRecord = 0 while loopInd < self.maxIterations and abs(lastObj - currentObj) > self.eps: if loopInd >= nextRecord: if loopInd != 0: print("") printStr = self.recordResults(muU2, muV2, trainMeasures, testMeasures, loopInd, rowSamples, indPtr, colInds, testIndPtr, testColInds, allIndPtr, allColInds, gi, gp, gq, trainX, startTime) logging.debug(printStr) if testIndPtr is not None and testMeasures[-1][ metricInd] >= bestMetric: bestMetric = testMeasures[-1][metricInd] bestU = muU2.copy() bestV = muV2.copy() elif testIndPtr is None: bestU = muU2.copy() bestV = muV2.copy() #Compute objective averaged over last 5 recorded steps trainMeasuresArr = numpy.array(trainMeasures) lastObj = currentObj currentObj = numpy.mean(trainMeasuresArr[-5:, 0]) nextRecord += self.recordStep iterationsPerBlock = sharedmem.zeros((numBlocks, numBlocks)) self.parallelUpdateUV(X, U2, V2, muU2, muV2, numBlocks, rowBlockSize, colBlockSize, rowIsFree, colIsFree, indPtr, colInds, lock, gi, gp, gq, normGp, normGq, iterationsPerBlock, loopInd) loopInd += numpy.floor(iterationsPerBlock.mean()) totalTime = time.time() - startTime #Compute quantities for last U and V print("") totalTime = time.time() - startTime printStr = "Finished, time=" + str('%.1f' % totalTime) + " " printStr += self.recordResults(muU2, muV2, trainMeasures, testMeasures, loopInd, rowSamples, indPtr, colInds, testIndPtr, testColInds, allIndPtr, allColInds, gi, gp, gq, trainX, startTime) printStr += " delta obj" + "%.3e" % abs(lastObj - currentObj) logging.debug(printStr) self.U = bestU self.V = bestV self.gi = gi self.gp = gp self.gq = gq if verbose: return self.U, self.V, numpy.array(trainMeasures), numpy.array( testMeasures), loopInd, totalTime else: return self.U, self.V
import multiprocessing as mp def worker(q,arr): done = False while not done: cmd = q.get() if cmd == 'done': done = True elif cmd == 'data': ##Fake data. In real life, get data from hardware. rnd=np.random.randint(100) print('rnd={0}'.format(rnd)) arr[:]=rnd q.task_done() if __name__=='__main__': N=10 arr=shm.zeros(N,dtype=np.uint8) q=mp.JoinableQueue() proc = mp.Process(target=worker, args=[q,arr]) proc.daemon=True proc.start() for i in range(3): q.put('data') # Wait for the computation to finish q.join() print arr.shape print(arr) q.put('done') proc.join()
a = qin.get() print(a) print("putting array in queue") b = shm.zeros(10) print(b) qout.put(b) print("waiting for array to be updated by another process") a = qin.get() print(b) if __name__ == "__main__": qin = mp.Queue() qout = mp.Queue() p = mp.Process(target=proc, args=(qin,qout)) p.start() a = shm.zeros(4) qin.put(a) b = qout.get() b[:] = range(10) qin.put(None) p.join() sturla$ python example.py grabbing array from queue [ 0. 0. 0. 0.] putting array in queue [ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.] waiting for array to be updated by another process [ 0. 1. 2. 3. 4. 5. 6. 7. 8. 9.]
new_e = get_likelihood(theta) # this will also update expected counts converged = round(abs(old_e - new_e), 2) == 0.0 old_e = new_e iterations += 1 elif options.algorithm == "EM-SGD-PARALLEL": if options.test_gradient.lower() == "true": gradient_check_em() else: print 'skipping gradient check...' print 'populating events per trellis...' populate_events_per_trellis() print 'done...' init_theta = initialize_theta(options.input_weights, feature_index) shared_sgd_theta = sharedmem.zeros(np.shape(init_theta)) shared_sgd_theta += init_theta new_e = get_likelihood(shared_sgd_theta) exp_new_e = get_likelihood_with_expected_counts(shared_sgd_theta) old_e = float('-inf') converged = False iterations = 0 ids = range(len(trellis)) while not converged and iterations < 5: eta0 = 1.0 shared_sum_squared_grad = sharedmem.zeros(np.shape(shared_sgd_theta)) I = 1.0 for _ in range(2): random.shuffle(ids) cpu_count = 1 # multiprocessing.cpu_count()
def parallelLearnModel(self, X, verbose=False, U=None, V=None): """ Max local AUC with Frobenius norm penalty on V. Solve with parallel (stochastic) gradient descent. The input is a sparse array. """ # Convert to a csarray for faster access if scipy.sparse.issparse(X): logging.debug("Converting to csarray") X2 = sppy.csarray(X, storagetype="row") X = X2 m, n = X.shape # We keep a validation set in order to determine when to stop if self.validationUsers != 0: numValidationUsers = int(m * self.validationUsers) trainX, testX, rowSamples = Sampling.shuffleSplitRows( X, 1, self.validationSize, numRows=numValidationUsers )[0] testIndPtr, testColInds = SparseUtils.getOmegaListPtr(testX) else: trainX = X testX = None rowSamples = None testIndPtr, testColInds = None, None # Not that to compute the test AUC we pick i \in X and j \notin X \cup testX indPtr, colInds = SparseUtils.getOmegaListPtr(trainX) allIndPtr, allColInds = SparseUtils.getOmegaListPtr(X) if U == None or V == None: U, V = self.initUV(trainX) if self.metric == "f1": metricInd = 2 elif self.metric == "mrr": metricInd = 3 else: raise ValueError("Unknown metric: " + self.metric) bestMetric = 0 bestU = 0 bestV = 0 trainMeasures = [] testMeasures = [] loopInd = 0 lastObj = 0 currentObj = lastObj - 2 * self.eps numBlocks = self.numProcesses + 1 gi, gp, gq = self.computeGipq(X) normGp, normGq = self.computeNormGpq(indPtr, colInds, gp, gq, m) # Some shared variables rowIsFree = sharedmem.ones(numBlocks, dtype=numpy.bool) colIsFree = sharedmem.ones(numBlocks, dtype=numpy.bool) # Create shared factors U2 = sharedmem.zeros((m, self.k)) V2 = sharedmem.zeros((n, self.k)) muU2 = sharedmem.zeros((m, self.k)) muV2 = sharedmem.zeros((n, self.k)) U2[:] = U[:] V2[:] = V[:] muU2[:] = U[:] muV2[:] = V[:] del U, V rowBlockSize = int(numpy.ceil(float(m) / numBlocks)) colBlockSize = int(numpy.ceil(float(n) / numBlocks)) lock = multiprocessing.Lock() startTime = time.time() loopInd = 0 iterationsPerBlock = sharedmem.zeros((numBlocks, numBlocks)) self.learnerCython = self.getCythonLearner() nextRecord = 0 while loopInd < self.maxIterations and abs(lastObj - currentObj) > self.eps: if loopInd >= nextRecord: if loopInd != 0: print("") printStr = self.recordResults( muU2, muV2, trainMeasures, testMeasures, loopInd, rowSamples, indPtr, colInds, testIndPtr, testColInds, allIndPtr, allColInds, gi, gp, gq, trainX, startTime, ) logging.debug(printStr) if testIndPtr is not None and testMeasures[-1][metricInd] >= bestMetric: bestMetric = testMeasures[-1][metricInd] bestU = muU2.copy() bestV = muV2.copy() elif testIndPtr is None: bestU = muU2.copy() bestV = muV2.copy() # Compute objective averaged over last 5 recorded steps trainMeasuresArr = numpy.array(trainMeasures) lastObj = currentObj currentObj = numpy.mean(trainMeasuresArr[-5:, 0]) nextRecord += self.recordStep iterationsPerBlock = sharedmem.zeros((numBlocks, numBlocks)) self.parallelUpdateUV( X, U2, V2, muU2, muV2, numBlocks, rowBlockSize, colBlockSize, rowIsFree, colIsFree, indPtr, colInds, lock, gi, gp, gq, normGp, normGq, iterationsPerBlock, loopInd, ) loopInd += numpy.floor(iterationsPerBlock.mean()) totalTime = time.time() - startTime # Compute quantities for last U and V print("") totalTime = time.time() - startTime printStr = "Finished, time=" + str("%.1f" % totalTime) + " " printStr += self.recordResults( muU2, muV2, trainMeasures, testMeasures, loopInd, rowSamples, indPtr, colInds, testIndPtr, testColInds, allIndPtr, allColInds, gi, gp, gq, trainX, startTime, ) printStr += " delta obj" + "%.3e" % abs(lastObj - currentObj) logging.debug(printStr) self.U = bestU self.V = bestV self.gi = gi self.gp = gp self.gq = gq if verbose: return self.U, self.V, numpy.array(trainMeasures), numpy.array(testMeasures), loopInd, totalTime else: return self.U, self.V