def __init__(self): # if DEBUGGING_SINGLE_PROCESS: # import queue # self.time_left = queue.Queue(1) # self.submission_action_plan = queue.Queue(1) # self.submission_error = queue.Queue(1) # self.submission_reported_gem_locations = queue.Queue(1) # else: self.time_left = mproc.Manager().Queue(1) self.submission_action_plan = mproc.Manager().Queue(1) self.submission_error = mproc.Manager().Queue(1) self.submission_reported_gem_locations = mproc.Manager().Queue(1)
def upload(self, filename, key, headers=None): chunk_size = globals.s3_multipart_chunk_size # Check minimum chunk size for S3 if chunk_size < globals.s3_multipart_minimum_chunk_size: log.Warn("Minimum chunk size is %d, but %d specified." % (globals.s3_multipart_minimum_chunk_size, chunk_size)) chunk_size = globals.s3_multipart_minimum_chunk_size # Decide in how many chunks to upload bytes = os.path.getsize(filename) if bytes < chunk_size: chunks = 1 else: chunks = bytes / chunk_size if (bytes % chunk_size): chunks += 1 log.Debug("Uploading %d bytes in %d chunks" % (bytes, chunks)) mp = self.bucket.initiate_multipart_upload(key, headers) # Initiate a queue to share progress data between the pool # workers and a consumer thread, that will collect and report queue = None if globals.progress: manager = multiprocessing.Manager() queue = manager.Queue() consumer = ConsumerThread(queue) consumer.start() pool = multiprocessing.Pool(processes=chunks) for n in range(chunks): params = [ self.scheme, self.parsed_url, self.bucket_name, mp.id, filename, n, chunk_size, globals.num_retries, queue ] pool.apply_async(multipart_upload_worker, params) pool.close() pool.join() # Terminate the consumer thread, if any if globals.progress: consumer.finish = True consumer.join() if len(mp.get_all_parts()) < chunks: mp.cancel_upload() raise BackendException("Multipart upload failed. Aborted.") return mp.complete_upload()
def lambda_multiMaster(start_time, isThreaded, envs, s3Enabled): global threadEvent objs = [] q = None if isThreaded: threadEvent = 'Event' if s3Enabled else None print 'is THREADED mode' totalItems = len(envs) pools = auditMeth.poolThreadNumber(totalItems, mp.cpu_count()) p = mp.Pool(1 if pools == 0 else pools) testing = False if not testing: xx = ({aID: e} for aID, e in envs.items()) print xx results = p.map(executeLambda, ({ aID: e } for aID, e in envs.items())) # results = None return results else: print 'is multi PROCESSOR mode' p = mp.Pool() m = mp.Manager() q = m.Queue() for aID, e in envs.items(): # client = aconnect.__get_client__('lambda') pload = None pload = {aID: e} print pload results = p.apply_async(executeLambda, (pload, q)) p.close() p.join() pyObj = {} if isThreaded: ### WORKS IN AWS LAMBDA ### for que in results: newobjs, newPyObj = que objs = objs + newobjs pyObj.update(newPyObj) else: while not q.empty(): newobjs, newPyObj = q.get() objs = objs + newobjs pyObj.update(newPyObj) # lambda_writeResult(upload2S3, pyObj, Main_bucket,sumoName) print("--- %s seconds B---" % (time.time() - start_time)) return results
def overlapaddparallel(Amat, Hmat, L=None, Nfft=None, y=None, verbose=False, logger=None, state_setter=None, base_state="", path=None): """ Fast two-dimensional linear convolution via the overlap-add method. The overlap-add method is well-suited to convolving a very large array, `Amat`, with a much smaller filter array, `Hmat` by breaking the large convolution into many smaller `L`-sized sub-convolutions, and evaluating these using the FFT. The computational savings over the straightforward two-dimensional convolution via, say, scipy.signal.convolve2d, can be substantial for large Amat and/or Hmat. Parameters ---------- Amat, Hmat : array_like Two-dimensional input arrays to be convolved. For computational purposes, Amat should be larger. L : sequence of two ints, optional Length of sub-convolution to use. This should ideally be as large as possible to obtain maximum speedup: that is, if you have the memory to compute the linear convolution using FFT2, i.e., replacing spatial convolution with frequency-domain multiplication, then let `L = np.array(Amat.shape) + np.array(Hmat.shape) - 1`. Usually, though, you are considering overlap-add because you can't afford a batch transform on your arrays, so make `L` as big as you can. Nfft : sequence of two ints, optional Size of the two-dimensional FFT to use for each `L`-sized sub-convolution. If omitted, defaults to the next power-of-two, that is, the next power-of-two on or after `L + np.array(Hmat.shape) - 1`. If you choose this default, try to avoid `L` such that this minimum, `L + np.array(Hmat.shape) - 1`, is just a bit greater than a power-of-two, since you'll be paying for a 4x larger FFT than you need. y : array_like, optional Storage for the output. Useful if using a memory-mapped file, e.g. verbose : boolean, optional If True, prints a message for each `L`-sized subconvolution. Returns ------- y : same as passed in, or ndarray if no `y` passed in The `np.array(Amat.shape) + np.array(Hmat.shape) - 1`-sized two-dimensional array containing the linear convolution. Should be within machine precision of, e.g., `scipy.signal.convolve2d(Amat, Hmat, 'full')`. Raises ------ ValueError if `L` and `Nfft` aren't two-element, and too small: both elements of `L` must be greater than zero, and `Nfft`'s must be greater than `L + np.array(Hmat.shape) - 1`. Also if `Amat` or `Hmat` aren't two-dimensional arrays, or if `y` doesn't have the correct size to store the output of the linear convolution. References ---------- Wikipedia is only semi-unhelpful on this topic: see "Overlap-add method". """ M = np.array(Hmat.shape) Na = np.array(Amat.shape) ys = (Amat.shape[0]+Hmat.shape[0]-1, Amat.shape[1]+Hmat.shape[1]-1) if path is None: path = os.getcwd() if L is None: L = M * 100 else: L = np.array(L) if Nfft is None: Nfft = 2 ** np.ceil(np.log2(L + M - 1)).astype(int) else: Nfft = np.array(Nfft, dtype=int) if not (np.all(L > 0) and L.size == 2): raise ValueError('L must have two positive elements') if not (np.all(Nfft >= L + M - 1) and Nfft.size == 2): raise ValueError('Nfft must have two elements >= L + M - 1 where M = Hmat.shape') if not (Amat.ndim <= 2 and Hmat.ndim <= 2): raise ValueError('Amat and Hmat must be 2D arrays') Hf = fft2(Hmat, Nfft) pool = multiprocessing.Pool() m = multiprocessing.Manager() lock = m.Lock() print_lock = m.Lock() results = [] logger.info("Starting job server with {} workers".format(pool._processes)) (XDIM, YDIM) = (1, 0) adjust = lambda x: x # no adjuster if np.isrealobj(Amat) and np.isrealobj(Hmat): # unless inputs are real adjust = np.real # then ensure real start = [0, 0] endd = [0, 0] total_boxes = (Na[XDIM] // L[XDIM] + 1) * (Na[YDIM] // L[YDIM] + 1) percent_done = Percenter(total_boxes) def closing_log(pos): print_lock.acquire() if verbose and logger is not None: logger.info("Finishing box {}".format(pos)) if verbose and state_setter is not None: percent_done.incr() state_setter(base_state + " {} done".format(percent_done.percent)) print_lock.release() while start[XDIM] <= Na[XDIM]: endd[XDIM] = min(start[XDIM] + L[XDIM], Na[XDIM]) start[YDIM] = 0 while start[YDIM] <= Na[YDIM]: if verbose and logger is not None: logger.info("Starting box {}".format(start)) endd[YDIM] = min(start[YDIM] + L[YDIM], Na[YDIM]) thisend = np.minimum(Na + M - 1, start + Nfft) pos = (start[YDIM], endd[YDIM], start[XDIM], endd[XDIM], thisend[YDIM], thisend[XDIM]) sub_arr = np.empty_like(Amat[start[YDIM]:endd[YDIM], start[XDIM]:endd[XDIM]]) sub_arr[:,:] = Amat[start[YDIM]:endd[YDIM], start[XDIM]:endd[XDIM]] res = pool.apply_async(computation, args=(sub_arr, Hf, pos, Nfft, y, ys, adjust, lock, path), callback=closing_log) results.append(res) start[YDIM] += L[YDIM] start[XDIM] += L[XDIM] pool.close() pool.join()
def ec2Item(self, current, aconnect, itemlist): printColor(['_____LISTING EC2 [] now....in .%s' % (aconnect._region)]) threaded = aconnect.getMultiThread() objs = [] lfound = [] rows = [] #processes = mp.cpu_count()*2 #p = mp.Pool(processes) #m = mp.Manager(processes=processes*2) #q = m.Queue(processes*2) #print itemlist pools = 2 #171.35 seconds #168 seconds totalItems = len(itemlist) if threaded: pools = auditMeth.poolThreadNumber(totalItems, mp.cpu_count()) if pools > 0: p = mp.Pool(pools) elif pools == 0: threaded = False else: p = mp.Pool() m = mp.Manager() q = m.Queue() nEnv = aconnect._env if totalItems > 0: objs.append([ 'Name[%s]' % (current.svc), 'Audit', 'Owner', 'type', 'Platform', 'SecurityGroups', 'Instance', 'Zone', 'VPC', 'State', 'cpu_week', 'cost', 'spot' ]) if not threaded: for unit in itemlist: name = unit['Name'] account = unit['OwnerId'] if str(nEnv) in name or aconnect._useAccounts: #p = Process(target=self.ec2Define,args=(current,aconnect,unit,name)) if pools == 0: newobjs, row = ec2Define(unit, name) else: getit = p.apply_async(ec2Define, (unit, name, q)) #getit.get() lfound.append(name) #newobjs,row = ec2Define(current,aconnect,unit,name) #objs = objs+newobjs #rows.append(row) else: #print itemlist lfound = [unit['Name'] for unit in itemlist] results = p.map(ec2Define, (unit for unit in itemlist)) if pools > 0: p.close() p.join() print results if threaded: for que in results: newobjs, row = que #print ' compute --got[C] result', row objs = objs + newobjs rows.append(row) else: while not q.empty(): newobjs, row = q.get() objs = objs + newobjs #print row #print newobjs rows.append(row) return (lfound, objs, rows)
def upload(self, filename, key, headers=None): import boto chunk_size = globals.s3_multipart_chunk_size # Check minimum chunk size for S3 if chunk_size < globals.s3_multipart_minimum_chunk_size: log.Warn("Minimum chunk size is %d, but %d specified." % (globals.s3_multipart_minimum_chunk_size, chunk_size)) chunk_size = globals.s3_multipart_minimum_chunk_size # Decide in how many chunks to upload bytes = os.path.getsize(filename) if bytes < chunk_size: chunks = 1 else: chunks = bytes / chunk_size if (bytes % chunk_size): chunks += 1 log.Debug("Uploading %d bytes in %d chunks" % (bytes, chunks)) mp = self.bucket.initiate_multipart_upload( key.key, headers, encrypt_key=globals.s3_use_sse) # Initiate a queue to share progress data between the pool # workers and a consumer thread, that will collect and report queue = None if globals.progress: manager = multiprocessing.Manager() queue = manager.Queue() consumer = ConsumerThread(queue) consumer.start() tasks = [] for n in range(chunks): storage_uri = boto.storage_uri(self.boto_uri_str) params = [ self.scheme, self.parsed_url, storage_uri, self.bucket_name, mp.id, filename, n, chunk_size, globals.num_retries, queue ] tasks.append( self._pool.apply_async(multipart_upload_worker, params)) log.Debug("Waiting for the pool to finish processing %s tasks" % len(tasks)) while tasks: try: tasks[0].wait(timeout=globals.s3_multipart_max_timeout) if tasks[0].ready(): if tasks[0].successful(): del tasks[0] else: log.Debug( "Part upload not successful, aborting multipart upload." ) self._setup_pool() break else: raise multiprocessing.TimeoutError except multiprocessing.TimeoutError: log.Debug( "%s tasks did not finish by the specified timeout, aborting multipart upload and resetting pool." % len(tasks)) self._setup_pool() break log.Debug("Done waiting for the pool to finish processing") # Terminate the consumer thread, if any if globals.progress: consumer.finish = True consumer.join() if len(tasks) > 0 or len(mp.get_all_parts()) < chunks: mp.cancel_upload() raise BackendException("Multipart upload failed. Aborted.") return mp.complete_upload()
def __init__(self, parent=None): m = mp.Manager() self.val = m.Value(c_bool, False) self._parent = parent
def rdsItem(self, current, aconnect, itemlist): added = False printColor(['_____LISTING RDS [] now....in .%s' % (aconnect._region)]) threaded = aconnect.getMultiThread() nEnv = aconnect._env lfound = [] objs = [] rows = [] pools = 2 # 171.35 seconds #168 seconds totalItems = len(itemlist) q = None if threaded: pools = auditMeth.poolThreadNumber(totalItems, mp.cpu_count()) p = mp.Pool(1 if pools == 0 else pools) else: p = mp.Pool() m = mp.Manager() q = m.Queue() if totalItems > 0: objs.append([ 'Name[%s]' % (current.svc), 'Audit', 'Owner', 'Engine', 'Size (GB)', 'Instance', 'MutliAZ', 'VPC', 'last_Modified', 'connections', 'cost' ]) if not threaded: for unit in itemlist: name = unit['DBInstanceIdentifier'] #sg=unit['VpcSecurityGroups'][0]['VpcSecurityGroupId'] #ec2 = aconnect.__get_client__('ec2') #sgUnit = ec2.describe_security_groups(GroupIds=[sg])['SecurityGroups'][0] #account = sgUnit['OwnerId'] if nEnv in name or aconnect._useAccounts: if pools == 0: objs, row = rdsDefine(unit, name, q) else: getit = p.apply_async(rdsDefine, (unit, name, q)) #getit.get() lfound.append(name) #objs,row = self.rdsDefine(current,aconnect,objs,unit,name) #rows.append(row) else: #print itemlist lfound = [unit['DBInstanceIdentifier'] for unit in itemlist] results = p.map(rdsDefine, (unit for unit in itemlist)) if pools > 0: p.close() p.join() if threaded: for que in results: newobjs, row = que #print ' DYNAMO --got[C] result', row objs = objs + newobjs rows.append(row) else: while not q.empty(): newobjs, row = q.get() objs = objs + newobjs # print newobjs rows.append(row) print objs return (lfound, objs, rows)
def dynamoItem(self, current, aconnect, itemlist): added = False printColor( ['_____LISTING DynamoDB [] now....in .%s' % (aconnect._region)]) threaded = aconnect.getMultiThread() nEnv = aconnect._env lfound = [] objs = [] rows = [] pools = 2 # 171.35 seconds #168 seconds totalItems = len(itemlist) if threaded: pools = auditMeth.poolThreadNumber(totalItems, mp.cpu_count()) if pools > 0: p = mp.Pool(pools) elif pools == 0: threaded = False else: p = mp.Pool() m = mp.Manager() q = m.Queue() if totalItems > 0: objs.append([ 'Name[%s]' % (current.svc), 'Audit', 'Owner', 'Status', 'PartitionKey', 'indexes', 'totalRead', 'totalWrite' ]) client = aconnect.__get_client__('dynamodb') if not threaded: for name in itemlist: unit = client.describe_table(TableName=name)['Table'] #account = dunit['TableArn'] if nEnv in name or aconnect._useAccounts: if pools == 0: objs, row = dynamoDefine(unit, name, q) else: getit = p.apply_async(dynamoDefine, (unit, name, q)) #getit.get() lfound.append(name) #objs,row=self.dynamoDefine(current,aconnect,objs,unit,name) #rows.append(row) else: #print itemlist lfound = [name for name in itemlist] #print lfound #print 'client ready?' #dd = client.describe_table(TableName='Tags')['Table'] #print dd results = p.map(dynamoDefine, (client.describe_table(TableName=name)['Table'] for name in itemlist)) if pools > 0: p.close() p.join() if threaded: for que in results: newobjs, row = que #print ' RDS --got[C] result', row objs = objs + newobjs rows.append(row) else: while not q.empty(): newobjs, row = q.get() objs = objs + newobjs # print newobjs rows.append(row) return (lfound, objs, rows)
def test_pipe_write(a, lock): ## pipe is used between two ## ## queue is used among them ## pass def test_pipe_read(lock): pass if __name__ == '__main__': time_begin = ctime() Thread_pool = dummy.Pool(10) queue = dummy.Manager().Queue() dic = dummy.Manager().dict() list = dummy.Manager().list() print 'current thread : ', dummy.current_process print 'current process : ', multiprocessing.current_process lock = dummy.Lock() for i in xrange(7): Thread_pool.apply_async(test_queue_write, args=(i, lock)) Thread_pool.apply_async(test_queue_read, args=(lock, )) Thread_pool.apply_async(test_dict_write, args=(i, lock)) Thread_pool.apply_async(test_dict_read, args=(lock, )) Thread_pool.apply_async(test_list_write, args=(i, lock)) Thread_pool.apply_async(test_list_read, args=(lock, )) Thread_pool.close() Thread_pool.join() print 'queue as following:(here queue had been empty, because queue.get above)'
sleep(2) ## 将耗时的部分放到lock外面,否则并行无意义 ## return 1 def callback_cost_longer(a): for i in xrange(1000000): a = a**2 a = a**3 a = a**4 a = a % 4 if __name__ == '__main__': time_begin = ctime() pool = multiprocessing.Pool(processes=3) dic = multiprocessing.Manager().dict() #lock = multiprocessing.Lock() lock = multiprocessing.Manager().Lock() Num = [] for i in xrange(5): msg = 'hello' + str(i) #pool.apply_async(test1, (msg, )) #pool.apply_async(test2, (msg, dic)) #pool.apply_async(test3, (msg, dic)) #pool.apply_async(test3, (msg, dic), callback=Num.append) pool.apply_async(test3, (msg, dic), callback=callback_cost_longer) print '== mark ==' pool.close() ## 调用pool.close(),这样就不会有新的进程加入到pool里面 ## pool.join() for k, v in dic.items(): print k, v
with lock: filepath = './result.callback' handle = open(filepath, 'a') print ctime(), 'save path is :', filepath for k, v in dic.items(): handle.write( json.dumps(k, ensure_ascii=False) + '\t' + str(v) + '\t' + ctime() + '\n') handle.close() if __name__ == '__main__': time_begin = ctime() pool = multiprocessing.Pool(10) lock = multiprocessing.Lock() dic = multiprocessing.Manager().dict() rootDir = './20170614' for root, dirs, files in os.walk(rootDir): for filespath in files: filename = os.path.join(root, filespath) print 'input filename :', filename pool.apply_async(check_file, args=(filename, lock)) pool.apply_async(save_file, args=(lock, )) print '=== acitve process === ' p_list = multiprocessing.active_children() for p in p_list: print p, p.name, p.daemon pool.close() pool.join() time_end = ctime() print 'result dict here is :'