def upload(root_path, bucket, aws_id, aws_key): max_workers = 20 q = LifoQueue(maxsize=5000) for i in range(max_workers): print 'Adding worker thread %s for queue processing' % i t = Worker(q, i, bucket, root_path, aws_id, aws_key) t.daemon = True t.start() total = 0 # https://docs.python.org/2/library/os.html for root, dirs, files in os.walk(root_path): for name in files: relative = root.split(root_path + os.sep)[1] path = os.path.join(relative, name) #print 'Adding %s to the queue' % path q.put(path) total += 1 while q.qsize() > (q.maxsize - max_workers): time.sleep(10) # sleep if our queue is getting too big for the next set of keys print 'Waiting for queue to be completed' q.join() print 'Done'
def copy_job(self, max_keys=1000): logging.info( 'start copy_bucket' ) src = self.job['source'] tgt = self.job['target'] conn = self.get_conn( tgt['owner'] ) srcBucket = conn.get_bucket( src['bucket'] ) tgtBucket = conn.get_bucket( tgt['bucket'] ) if self.job['options']['allow-acl-change']: ownerBucketView = self.get_conn( src['owner'] ).get_bucket( src['bucket'] ) ownerID = self.users[ tgt['owner'] ]['canonical-id'] else: ownerBucketView = None ownerID = None resultMarker = '' q = LifoQueue(maxsize=5000) for i in range(self.parallel): logging.info( 'adding worker %d' % i ) t = BucketCopyWorker(q, srcBucket, tgtBucket, src['key-prefix'], tgt['key-prefix'], ownerBucketView, ownerID) t.daemon = True t.start() while True: logging.info( 'fetch next 1000, backlog currently at %i' % q.qsize() ) keys = srcBucket.get_all_keys( prefix=src['key-prefix'], max_keys=max_keys, marker = resultMarker) for k in keys: q.put(k.key) if len(keys) < max_keys: print 'Done' break resultMarker = keys[maxKeys - 1].key q.join() logging.info( 'done copy_bucket' )
def copyBucket(maxKeys=1000): print 'start' conn = S3Connection(aws_key, aws_secret_key) srcBucket = conn.get_bucket(srcBucketName) resultMarker = '' q = LifoQueue(maxsize=5000) for i in range(10): print 'adding worker' t = Worker(q) t.daemon = True t.start() while True: print 'fetch next 1000, backlog currently at %i' % q.qsize() keys = srcBucket.get_all_keys(max_keys=maxKeys, marker=resultMarker) for k in keys: q.put(k.key) if len(keys) < maxKeys: print 'Done' break resultMarker = keys[maxKeys - 1].key q.join() print 'done'
def copyBucket(maxKeys=1000): print 'start' s_conn = S3Connection(source_aws_key, source_aws_secret_key) srcBucket = s_conn.get_bucket(srcBucketName) resultMarker = '' q = LifoQueue(maxsize=5000) for i in range(10): print 'adding worker' t = Worker(q) t.daemon = True t.start() while True: print 'fetch next 1000, backlog currently at %i' % q.qsize() keys = srcBucket.get_all_keys(max_keys=maxKeys, marker=resultMarker) for k in keys: q.put(k.key) if len(keys) < maxKeys: print 'Done' break resultMarker = keys[maxKeys - 1].key q.join() print 'done'
def copy_bucket(aws_key, aws_secret_key, src_bucket_name, dst_bucket_name): print print 'Start copy of %s to %s' % (src_bucket_name, dst_bucket_name) print max_keys = 1000 conn = S3Connection(aws_key, aws_secret_key) srcBucket = conn.get_bucket(src_bucket_name) result_marker = '' q = LifoQueue(maxsize=5000) for i in range(20): print 'Adding worker thread %s for queue processing' % i t = Worker(q, i, aws_key, aws_secret_key, src_bucket_name, dst_bucket_name) t.daemon = True t.start() i = 0 while True: print 'Fetch next %s, backlog currently at %s, have done %s' % (max_keys, q.qsize(), i) try: keys = srcBucket.get_all_keys(max_keys=max_keys, marker=result_marker) if len(keys) == 0: break for k in keys: i += 1 q.put(k.key) print 'Added %s keys to queue' % len(keys) if len(keys) < max_keys: print 'All items now in queue' break result_marker = keys[max_keys - 1].key while q.qsize() > (q.maxsize - max_keys): time.sleep(1) # sleep if our queue is getting too big for the next set of keys except BaseException: logging.exception('error during fetch, quitting') break print 'Waiting for queue to be completed' q.join() print print 'Done' print
def copy_objects(from_conn, to_conn, from_bkt, to_bkt, thd_count, overwrite=False, verbose=False): max_keys = 1000 result_marker = '' db_path = DB_PATH_ROOT + "/" + from_conn.name + "/" + from_bkt.name if not os.path.exists(db_path): os.makedirs(db_path) db = leveldb.LevelDB(db_path) q = LifoQueue(maxsize=5000) # Dispatch tasks to ObjWorker from LifoQueue for i in xrange(thd_count): t = ObjWorker(i, q, from_conn, from_bkt, to_conn, to_bkt, overwrite, verbose, db) t.daemon = True t.start() if not os.path.exists(from_bkt.name): os.mkdir(from_bkt.name) # Add tasks to LifoQueue i = 0 while True: try: keys = from_bkt.get_all_keys(max_keys=max_keys, marker=result_marker) if len(keys) == 0: break for i_key in keys: i += 1 q.put(i_key.key) if len(keys) < max_keys: break result_marker = keys[max_keys - 1].key while q.qsize() > (q.maxsize - max_keys): time.sleep(1) except BaseException: logging.exception('error during fetch, quit') break q.join() os.rmdir(from_bkt.name)
class ThreadPool(object): def __init__(self, threadNum, max_tasks_per_period=10, seconds_per_period=30): self.pool = [] # 线程池 self.threadNum = threadNum # 线程数 self.runningLock = Lock() # 线程锁 self.taskLock = Lock() # getTask函数的锁 self.running = 0 # 正在run的线程数 # 设置为LIFO队列:在抓取了第一个post的页面后,随后需要添加所有其后的评论页, # 使用LIFO队列可以保证尽快将第一个post的所有评论抓取到,并存储 self.taskQueue = LifoQueue() # 任务队列 # 一分钟内允许的最大访问次数 self.max_tasks_per_period = max_tasks_per_period # 定制每分钟含有的秒数 self.seconds_per_period = seconds_per_period # 当前周期内已经访问的网页数量 self.currentPeriodVisits = 0 # 将一分钟当作一个访问周期,记录当前周期的开始时间 self.periodStart = time.time() # 使用当前时间初始化 def startThreads(self): """Create a certain number of threads and started to run All Workers share the same ThreadPool """ # 开始当前的抓取周期 self.periodStart = time.time() for i in range(self.threadNum): self.pool.append(Worker(self, i)) def stopThreads(self): for thread in self.pool: thread.stop() thread.join() del self.pool[:] def putTask(self, func, *args, **kargs): self.taskQueue.put((func, args, kargs)) def getTask(self, *args, **kargs): # 进行访问控制: 判断当前周期内访问的网页数目是否大于最大数目 if self.currentPeriodVisits >= self.max_tasks_per_period - 2: timeNow = time.time() seconds = timeNow - self.periodStart if seconds < self.seconds_per_period: # 如果当前还没有过一分钟,则sleep remain = self.seconds_per_period - seconds print "ThreadPool Waiting for " + str(remain) + " seconds." time.sleep(int(remain + 1)) self.periodStart = time.time() # 重新设置开始时间 self.currentPeriodVisits = 0 try: # task = self.taskQueue.get(*args, **kargs) task = self.taskQueue.get_nowait() except Empty: return (None, None, None) self.currentPeriodVisits += 1 return task def taskJoin(self, *args, **kargs): """Queue.join: Blocks until all items in the queue have been gotten and processed. """ self.taskQueue.join() def taskDone(self, *args, **kargs): self.taskQueue.task_done() def increaseRunsNum(self): self.runningLock.acquire() self.running += 1 # 正在运行的线程数加1 self.runningLock.release() def decreaseRunsNum(self): self.runningLock.acquire() self.running -= 1 self.runningLock.release() def getTaskLeft(self): # 线程池的所有任务包括: # taskQueue中未被下载的任务, resultQueue中完成了但是还没被取出的任务, 正在运行的任务 # 因此任务总数为三者之和 return self.taskQueue.qsize() + self.running
def copy_bucket(aws_key, aws_secret_key, args): max_keys = 1000 src = args.src_bucket dst = args.dest_bucket conn = S3Connection(aws_key, aws_secret_key) try: (src_bucket_name, src_path) = src.split('/', 1) except ValueError: src_bucket_name = src src_path = None try: (dst_bucket_name, dst_path) = dst.split('/', 1) except ValueError: dst_bucket_name = dst dst_path = None src_bucket = conn.get_bucket(src_bucket_name) if args.verbose: print print 'Start copy of %s to %s' % (src, dst) print result_marker = '' q = LifoQueue(maxsize=5000) for i in xrange(args.threads_no): if args.verbose: print 'Adding worker thread %s for queue processing' % i t = Worker(q, i, aws_key, aws_secret_key, src_bucket_name, dst_bucket_name, src_path, dst_path, args) t.daemon = True t.start() i = 0 while True: if args.verbose: print 'Fetch next %s, backlog currently at %s, have done %s' % \ (max_keys, q.qsize(), i) try: keys = src_bucket.get_all_keys(max_keys=max_keys, marker=result_marker, prefix=src_path or '') if len(keys) == 0: break for k in keys: i += 1 q.put(k.key) if args.verbose: print 'Added %s keys to queue' % len(keys) if len(keys) < max_keys: if args.verbose: print 'All items now in queue' break result_marker = keys[max_keys - 1].key while q.qsize() > (q.maxsize - max_keys): time.sleep(1) # sleep if our queue is getting too big for the next set of keys except BaseException: logging.exception('error during fetch, quitting') break if args.verbose: print 'Waiting for queue to be completed' q.join() if args.verbose: print print 'Done' print
def downloader(queue, proxies): while True: url = queue.get() r = requests.get(url) if r.status_code == 200: parsed_html = BeautifulSoup(r.content) table = parsed_html.body.find( 'table', attrs={'class': 'table table-striped proxy-list'}) table_body = table.find('tbody') rows = table_body.find_all('tr') print url + ' was fetched\n' for row in rows: proxy_tmp = {} ele_td = row.find("td", class_="first nsb") proxy_tmp['http'] = 'http://' + ele_td.text + ':80' if test_proxy(proxy_tmp): proxies.insert(proxy_tmp) else: print proxy_tmp['http'] + ' does not work, ignore it' q.task_done() if __name__ == '__main__': for i in range(max_threads): worker = Thread(target=downloader, args=(q, proxies)) worker.setDaemon(True) worker.start() q.join() client.close()
class TileProvider( QObject ): THREAD_HEARTBEAT = 0.2 Tile = collections.namedtuple('Tile', 'id qimg rectF progress tiling') changed = pyqtSignal( QRectF ) '''TileProvider __init__ Keyword Arguments: cache_size -- maximal number of encountered stacks to cache, i.e. slices if the imagesources draw from slicesources (default 10) request_queue_size -- maximal number of request to queue up (default 100000) n_threads -- maximal number of request threads; this determines the maximal number of simultaneously running requests to the pixelpipeline (default: 2) layerIdChange_means_dirty -- layerId changes invalidate the cache; by default only stackId changes do that (default False) parent -- QObject ''' def __init__( self, tiling, stackedImageSources, cache_size = 10, request_queue_size = 100000, n_threads = 2, layerIdChange_means_dirty=False, parent=None ): QObject.__init__( self, parent = parent ) self.tiling = tiling self._sims = stackedImageSources self._cache_size = cache_size self._request_queue_size = request_queue_size self._n_threads = n_threads self._layerIdChange_means_dirty = layerIdChange_means_dirty self._current_stack_id = self._sims.stackId self._cache = _TilesCache(self._current_stack_id, self._sims, maxstacks=self._cache_size) self._dirtyLayerQueue = LifoQueue(self._request_queue_size) self._sims.layerDirty.connect(self._onLayerDirty) self._sims.visibleChanged.connect(self._onVisibleChanged) self._sims.opacityChanged.connect(self._onOpacityChanged) self._sims.sizeChanged.connect(self._onSizeChanged) self._sims.orderChanged.connect(self._onOrderChanged) self._sims.stackIdChanged.connect(self._onStackIdChanged) if self._layerIdChange_means_dirty: self._sims.layerIdChanged.connect(self._onLayerIdChanged) self._keepRendering = True self._dirtyLayerThreads = [Thread(target=self._dirtyLayersWorker) for i in range(self._n_threads)] for thread in self._dirtyLayerThreads: thread.daemon = True [ thread.start() for thread in self._dirtyLayerThreads ] def getTiles( self, rectF ): '''Get tiles in rect and request a refresh. Returns tiles intersectinf with rectF immediatelly and requests a refresh of these tiles. Next time you call this function the tiles may be already (partially) updated. If you want to wait until the rendering is fully complete, call join(). ''' self.requestRefresh( rectF ) tile_nos = self.tiling.intersectedF( rectF ) stack_id = self._current_stack_id for tile_no in tile_nos: qimg, progress = self._cache.tile(stack_id, tile_no) t = TileProvider.Tile(tile_no, qimg, QRectF(self.tiling.imageRects[tile_no]), progress, self.tiling) yield t def requestRefresh( self, rectF ): '''Requests tiles to be refreshed. Returns immediatelly. Call join() to wait for the end of the rendering. ''' tile_nos = self.tiling.intersectedF( rectF ) for tile_no in tile_nos: stack_id = self._current_stack_id self._refreshTile( stack_id, tile_no ) def join( self ): '''Wait until all refresh request are processed. Blocks until no refresh request pending anymore and all rendering finished. ''' return self._dirtyLayerQueue.join() def notifyThreadsToStop( self ): '''Signals render threads to stop. Call this method at the end of the lifetime of a TileProvider instance. Otherwise the garbage collector will not clean up the instance (even if you call del). ''' self._keepRendering = False def threadsAreNotifiedToStop( self ): '''Check if NotifyThreadsToStop() was called at least once.''' return not self._keepRendering def joinThreads( self, timeout=None ): '''Wait until all threads terminated. Without calling notifyThreadsToStop, threads will never terminate. Arguments: timeout -- timeout in seconds as a floating point number ''' for thread in self._dirtyLayerThreads: thread.join( timeout ) def aliveThreads( self ): '''Return a map of thread identifiers and their alive status. All threads are alive until notifyThreadsToStop() is called. After that, they start dying. Call joinThreads() to wait for the last thread to die. ''' at = {} for thread in self._dirtyLayerThreads: if thread.ident: at[thread.ident] = thread.isAlive() return at def _dirtyLayersWorker( self ): while self._keepRendering: try: ims, tile_nr, stack_id, image_req, timestamp, cache = self._dirtyLayerQueue.get(True, self.THREAD_HEARTBEAT) except (Empty, TypeError): #the TypeError occurs when the self._dirtyLayerQueue #is already None when the thread is being shut down #on program exit. #This avoids a lot of warnings. continue try: if timestamp > cache.layerTimestamp( stack_id, ims, tile_nr ): img = image_req.wait() cache.updateTileIfNecessary( stack_id, ims, tile_nr, timestamp, img ) if stack_id == self._current_stack_id and cache is self._cache: self.changed.emit(QRectF(self.tiling.imageRects[tile_nr])) except KeyError: pass finally: self._dirtyLayerQueue.task_done() def _refreshTile( self, stack_id, tile_no ): try: if self._cache.tileDirty( stack_id, tile_no ): self._cache.setTileDirty(stack_id, tile_no, False) img = self._renderTile( stack_id, tile_no ) self._cache.setTile( stack_id, tile_no, img, self._sims.viewVisible(), self._sims.viewOccluded() ) # refresh dirty layer tiles for ims in self._sims.viewImageSources(): if self._cache.layerDirty(stack_id, ims, tile_no) and not self._sims.isOccluded(ims) and self._sims.isVisible(ims): req = (ims, tile_no, stack_id, ims.request(self.tiling.imageRects[tile_no]), time.time(), self._cache) try: self._dirtyLayerQueue.put_nowait( req ) except Full: warnings.warn("Request queue full. Dropping tile refresh request. Increase queue size!") except KeyError: pass def _renderTile( self, stack_id, tile_nr ): qimg = QImage(self.tiling.imageRects[tile_nr].size(), QImage.Format_ARGB32_Premultiplied) qimg.fill(Qt.white) p = QPainter(qimg) for i, v in enumerate(reversed(self._sims)): visible, layerOpacity, layerImageSource = v if not visible: continue patch = self._cache.layer(stack_id, layerImageSource, tile_nr ) if patch is not None: p.setOpacity(layerOpacity) p.drawImage(0,0, patch) p.end() return qimg def _onLayerDirty(self, dirtyImgSrc, rect ): if dirtyImgSrc in self._sims.viewImageSources(): visibleAndNotOccluded = self._sims.isVisible( dirtyImgSrc ) and not self._sims.isOccluded( dirtyImgSrc ) for tile_no in xrange(len(self.tiling)): #and invalid rect means everything is dirty if not rect.isValid() or self.tiling.tileRects[tile_no].intersected( rect ): for ims in self._sims.viewImageSources(): self._cache.setLayerDirtyAll(ims, tile_no, True) if visibleAndNotOccluded: self._cache.setTileDirtyAll(tile_no, True) if visibleAndNotOccluded: self.changed.emit( QRectF(rect) ) def _onStackIdChanged( self, oldId, newId ): if newId in self._cache: self._cache.touchStack( newId ) else: self._cache.addStack( newId ) self._current_stack_id = newId self.changed.emit(QRectF()) def _onLayerIdChanged( self, ims, oldId, newId ): if self._layerIdChange_means_dirty: self._onLayerDirty( ims, QRect() ) def _onVisibleChanged(self, ims, visible): for tile_no in xrange(len(self.tiling)): self._cache.setTileDirtyAll(tile_no, True) if not self._sims.isOccluded( ims ): self.changed.emit(QRectF()) def _onOpacityChanged(self, ims, opacity): for tile_no in xrange(len(self.tiling)): self._cache.setTileDirtyAll(tile_no, True) if self._sims.isVisible( ims ) and not self._sims.isOccluded( ims ): self.changed.emit(QRectF()) def _onSizeChanged(self): self._cache = _TilesCache(self._current_stack_id, self._sims, maxstacks=self._cache_size) self._dirtyLayerQueue = LifoQueue(self._request_queue_size) self.changed.emit(QRectF()) def _onOrderChanged(self): for tile_no in xrange(len(self.tiling)): self._cache.setTileDirtyAll(tile_no, True) self.changed.emit(QRectF())
class SaveManager(QObject): start_save = pyqtSignal() report_error = pyqtSignal(object) save_done = pyqtSignal() def __init__(self, parent): QObject.__init__(self, parent) self.count = 0 self.last_saved = -1 self.requests = LifoQueue() t = Thread(name='save-thread', target=self.run) t.daemon = True t.start() self.status_widget = w = SaveWidget(parent) self.start_save.connect(w.start, type=Qt.QueuedConnection) self.save_done.connect(w.stop, type=Qt.QueuedConnection) def schedule(self, tdir, container): self.count += 1 self.requests.put((self.count, tdir, container)) def run(self): while True: x = self.requests.get() if x is None: self.requests.task_done() self.__empty_queue() break try: count, tdir, container = x self.process_save(count, tdir, container) except: import traceback traceback.print_exc() finally: self.requests.task_done() def __empty_queue(self): ' Only to be used during shutdown ' while True: try: self.requests.get_nowait() except Empty: break else: self.requests.task_done() def process_save(self, count, tdir, container): if count <= self.last_saved: shutil.rmtree(tdir, ignore_errors=True) return self.last_saved = count self.start_save.emit() try: self.do_save(tdir, container) except: import traceback self.report_error.emit(traceback.format_exc()) self.save_done.emit() def do_save(self, tdir, container): try: save_container(container, container.path_to_ebook) finally: shutil.rmtree(tdir, ignore_errors=True) @property def has_tasks(self): return bool(self.requests.unfinished_tasks) def wait(self, timeout=30): if timeout is None: self.requests.join() else: try: join_with_timeout(self.requests, timeout) except RuntimeError: return False return True def shutdown(self): self.requests.put(None)
class TileProvider( QObject ): THREAD_HEARTBEAT = 0.2 Tile = collections.namedtuple('Tile', 'id qimg rectF progress tiling') sceneRectChanged = pyqtSignal( QRectF ) '''TileProvider __init__ Keyword Arguments: cache_size -- maximal number of encountered stacks to cache, i.e. slices if the imagesources draw from slicesources (default 10) request_queue_size -- maximal number of request to queue up (default 100000) n_threads -- maximal number of request threads; this determines the maximal number of simultaneously running requests to the pixelpipeline (default: 2) layerIdChange_means_dirty -- layerId changes invalidate the cache; by default only stackId changes do that (default False) parent -- QObject ''' @property def axesSwapped(self): return self._axesSwapped @axesSwapped.setter def axesSwapped(self, value): self._axesSwapped = value def __init__( self, tiling, stackedImageSources, cache_size=100, request_queue_size=100000, n_threads=2, layerIdChange_means_dirty=False, parent=None ): QObject.__init__( self, parent = parent ) self.tiling = tiling self.axesSwapped = False self._sims = stackedImageSources self._cache_size = cache_size self._request_queue_size = request_queue_size self._n_threads = n_threads self._layerIdChange_means_dirty = layerIdChange_means_dirty self._current_stack_id = self._sims.stackId self._cache = _TilesCache(self._current_stack_id, self._sims, maxstacks=self._cache_size) self._dirtyLayerQueue = LifoQueue(self._request_queue_size) self._prefetchQueue = Queue(self._request_queue_size) self._sims.layerDirty.connect(self._onLayerDirty) self._sims.visibleChanged.connect(self._onVisibleChanged) self._sims.opacityChanged.connect(self._onOpacityChanged) self._sims.sizeChanged.connect(self._onSizeChanged) self._sims.orderChanged.connect(self._onOrderChanged) self._sims.stackIdChanged.connect(self._onStackIdChanged) if self._layerIdChange_means_dirty: self._sims.layerIdChanged.connect(self._onLayerIdChanged) self._keepRendering = True self._dirtyLayerThreads = [Thread(target=self._dirtyLayersWorker) for i in range(self._n_threads)] for thread in self._dirtyLayerThreads: thread.daemon = True [ thread.start() for thread in self._dirtyLayerThreads ] def getTiles( self, rectF ): '''Get tiles in rect and request a refresh. Returns tiles intersecting with rectF immediately and requests a refresh of these tiles. Next time you call this function the tiles may be already (partially) updated. If you want to wait until the rendering is fully complete, call join(). ''' self.requestRefresh( rectF ) tile_nos = self.tiling.intersected( rectF ) stack_id = self._current_stack_id for tile_no in tile_nos: qimg, progress = self._cache.tile(stack_id, tile_no) yield TileProvider.Tile( tile_no, qimg, QRectF(self.tiling.imageRects[tile_no]), progress, self.tiling) def requestRefresh( self, rectF ): '''Requests tiles to be refreshed. Returns immediately. Call join() to wait for the end of the rendering. ''' tile_nos = self.tiling.intersected( rectF ) for tile_no in tile_nos: stack_id = self._current_stack_id self._refreshTile( stack_id, tile_no ) def prefetch( self, rectF, through ): '''Request fetching of tiles in advance. Returns immediately. Prefetch will commence after all regular tiles are refreshed (see requestRefresh() and getTiles() ). The prefetch is reset when the 'through' value of the slicing changes. Several calls to prefetch are handeled in Fifo order. ''' if self._cache_size > 1: stack_id = (self._current_stack_id[0], through) if stack_id not in self._cache: self._cache.addStack(stack_id) self._cache.touchStack( self._current_stack_id ) tile_nos = self.tiling.intersected( rectF ) for tile_no in tile_nos: self._refreshTile( stack_id, tile_no, prefetch=True ) def join( self ): '''Wait until all refresh request are processed. Blocks until no refresh request pending anymore and all rendering finished. ''' return self._dirtyLayerQueue.join() def notifyThreadsToStop( self ): '''Signals render threads to stop. Call this method at the end of the lifetime of a TileProvider instance. Otherwise the garbage collector will not clean up the instance (even if you call del). ''' self._keepRendering = False def threadsAreNotifiedToStop( self ): '''Check if NotifyThreadsToStop() was called at least once.''' return not self._keepRendering def joinThreads( self, timeout=None ): '''Wait until all threads terminated. Without calling notifyThreadsToStop, threads will never terminate. Arguments: timeout -- timeout in seconds as a floating point number ''' for thread in self._dirtyLayerThreads: thread.join( timeout ) def aliveThreads( self ): '''Return a map of thread identifiers and their alive status. All threads are alive until notifyThreadsToStop() is called. After that, they start dying. Call joinThreads() to wait for the last thread to die. ''' at = {} for thread in self._dirtyLayerThreads: if thread.ident: at[thread.ident] = thread.isAlive() return at def _dirtyLayersWorker( self ): while self._keepRendering: # Save reference to the queue in case self._dirtyLayerQueue reassigned during this pass. # See onSizeChanged() dirtyLayerQueue = self._dirtyLayerQueue prefetchQueue = self._prefetchQueue try: try: result = dirtyLayerQueue.get_nowait() queue = dirtyLayerQueue except Empty: try: result = prefetchQueue.get_nowait() queue = prefetchQueue except Empty: try: result = dirtyLayerQueue.get(True, self.THREAD_HEARTBEAT) queue = dirtyLayerQueue except Empty: continue except TypeError: #the TypeError occurs when the queue #is already None when the thread is being shut down #on program exit. #This avoids a lot of warnings. continue ims, transform, tile_nr, stack_id, image_req, timestamp, cache = result try: try: layerTimestamp = cache.layerTimestamp( stack_id, ims, tile_nr ) except KeyError: pass else: if timestamp > layerTimestamp: img = image_req.wait() img = img.transformed(transform) try: cache.updateTileIfNecessary( stack_id, ims, tile_nr, timestamp, img ) except KeyError: pass else: if stack_id == self._current_stack_id and cache is self._cache: self.sceneRectChanged.emit(QRectF(self.tiling.imageRects[tile_nr])) except: with volumina.printLock: sys.excepthook( *sys.exc_info() ) sys.stderr.write("ERROR: volumina tiling layer rendering worker thread caught an unhandled exception. See above.") finally: queue.task_done() def _refreshTile( self, stack_id, tile_no, prefetch=False ): if not self.axesSwapped: transform = QTransform(0,1,0,1,0,0,1,1,1) else: transform = QTransform().rotate(90).scale(1,-1) transform *= self.tiling.data2scene try: if self._cache.tileDirty( stack_id, tile_no ): if not prefetch: self._cache.setTileDirty(stack_id, tile_no, False) img = self._renderTile( stack_id, tile_no ) self._cache.setTile(stack_id, tile_no, img, self._sims.viewVisible(), self._sims.viewOccluded()) # refresh dirty layer tiles for ims in self._sims.viewImageSources(): if self._cache.layerDirty(stack_id, ims, tile_no) \ and not self._sims.isOccluded(ims) \ and self._sims.isVisible(ims): rect = self.tiling.imageRects[tile_no] dataRect = self.tiling.scene2data.mapRect(rect) ims_req = ims.request(dataRect, stack_id[1]) if ims.direct: # The ImageSource 'ims' is fast (it has the # direct flag set to true) so we process # the request synchronously here. This # improves the responsiveness for layers # that have the data readily available. start = time.time() img = ims_req.wait() img = img.transformed(transform) stop = time.time() ims._layer.timePerTile(stop-start, self.tiling.imageRects[tile_no]) self._cache.updateTileIfNecessary( stack_id, ims, tile_no, time.time(), img ) img = self._renderTile( stack_id, tile_no ) self._cache.setTile(stack_id, tile_no, img, self._sims.viewVisible(), self._sims.viewOccluded() ) else: req = (ims, transform, tile_no, stack_id, ims_req, time.time(), self._cache) try: if prefetch: self._prefetchQueue.put_nowait( req ) else: self._dirtyLayerQueue.put_nowait( req ) except Full: msg = " ".join(("Request queue full.", "Dropping tile refresh request.", "Increase queue size!")) warnings.warn(msg) except KeyError: pass def _renderTile( self, stack_id, tile_nr): qimg = None p = None for i, v in enumerate(reversed(self._sims)): visible, layerOpacity, layerImageSource = v if not visible: continue patch = self._cache.layer(stack_id, layerImageSource, tile_nr ) if patch is not None: if qimg is None: qimg = QImage(self.tiling.imageRects[tile_nr].size(), QImage.Format_ARGB32_Premultiplied) qimg.fill(0xffffffff) # Use a hex constant instead. p = QPainter(qimg) p.setOpacity(layerOpacity) p.drawImage(0,0, patch) if p is not None: p.end() return qimg def _onLayerDirty(self, dirtyImgSrc, dataRect ): sceneRect = self.tiling.data2scene.mapRect(dataRect) if dirtyImgSrc in self._sims.viewImageSources(): visibleAndNotOccluded = self._sims.isVisible( dirtyImgSrc ) \ and not self._sims.isOccluded( dirtyImgSrc ) for tile_no in xrange(len(self.tiling)): # an invalid rect means everything is dirty if not sceneRect.isValid() \ or self.tiling.tileRects[tile_no].intersected( sceneRect ): for ims in self._sims.viewImageSources(): self._cache.setLayerDirtyAll(ims, tile_no, True) if visibleAndNotOccluded: self._cache.setTileDirtyAll(tile_no, True) if visibleAndNotOccluded: self.sceneRectChanged.emit( QRectF(sceneRect) ) def _onStackIdChanged( self, oldId, newId ): if newId in self._cache: self._cache.touchStack( newId ) else: self._cache.addStack( newId ) self._current_stack_id = newId self._prefetchQueue = Queue(self._request_queue_size) self.sceneRectChanged.emit(QRectF()) def _onLayerIdChanged( self, ims, oldId, newId ): if self._layerIdChange_means_dirty: self._onLayerDirty( ims, QRect() ) def _onVisibleChanged(self, ims, visible): for tile_no in xrange(len(self.tiling)): self._cache.setTileDirtyAll(tile_no, True) if not self._sims.isOccluded( ims ): self.sceneRectChanged.emit(QRectF()) def _onOpacityChanged(self, ims, opacity): for tile_no in xrange(len(self.tiling)): self._cache.setTileDirtyAll(tile_no, True) if self._sims.isVisible( ims ) and not self._sims.isOccluded( ims ): self.sceneRectChanged.emit(QRectF()) def _onSizeChanged(self): self._cache = _TilesCache(self._current_stack_id, self._sims, maxstacks=self._cache_size) self._dirtyLayerQueue = LifoQueue(self._request_queue_size) self._prefetchQueue = Queue(self._request_queue_size) self.sceneRectChanged.emit(QRectF()) def _onOrderChanged(self): for tile_no in xrange(len(self.tiling)): self._cache.setTileDirtyAll(tile_no, True) self.sceneRectChanged.emit(QRectF())
def copy_bucket(aws_key, aws_secret_key, args): max_keys = 1000 src = args.src_bucket dst = args.dest_bucket conn = S3Connection(aws_key, aws_secret_key) try: (src_bucket_name, src_path) = src.split('/', 1) except ValueError: src_bucket_name = src src_path = None try: (dst_bucket_name, dst_path) = dst.split('/', 1) except ValueError: dst_bucket_name = dst dst_path = None src_bucket = conn.get_bucket(src_bucket_name) if args.verbose: print print 'Start copy of %s to %s' % (src, dst) print result_marker = '' q = LifoQueue(maxsize=5000) for i in xrange(args.threads_no): if args.verbose: print 'Adding worker thread %s for queue processing' % i t = Worker(q, i, aws_key, aws_secret_key, src_bucket_name, dst_bucket_name, src_path, dst_path, args) t.daemon = True t.start() i = 0 while True: if args.verbose: print 'Fetch next %s, backlog currently at %s, have done %s' % \ (max_keys, q.qsize(), i) try: keys = src_bucket.get_all_keys(max_keys=max_keys, marker=result_marker, prefix=src_path or '') if len(keys) == 0: break for k in keys: i += 1 q.put(k.key) if args.verbose: print 'Added %s keys to queue' % len(keys) if len(keys) < max_keys: if args.verbose: print 'All items now in queue' break result_marker = keys[max_keys - 1].key while q.qsize() > (q.maxsize - max_keys): time.sleep( 1 ) # sleep if our queue is getting too big for the next set of keys except BaseException: logging.exception('error during fetch, quitting') break if args.verbose: print 'Waiting for queue to be completed' q.join() if args.verbose: print print 'Done' print
def copy_bucket(aws_key, aws_secret_key, src, dst): max_keys = 1000 conn = S3Connection(aws_key, aws_secret_key) try: (src_bucket_name, src_path) = src.split('/', 1) except ValueError: src_bucket_name = src src_path = None try: (dst_bucket_name, dst_path) = dst.split('/', 1) except ValueError: dst_bucket_name = dst dst_path = None if dst_path is not None: raise ValueError("not currently implemented to set dest path; must use default, which will mirror the source") src_bucket = conn.get_bucket(src_bucket_name) print print 'Start copy of %s to %s' % (src, dst) print result_marker = '' q = LifoQueue(maxsize=5000) for i in range(20): print 'Adding worker thread %s for queue processing' % i t = Worker(q, i, aws_key, aws_secret_key, src_bucket_name, dst_bucket_name, src_path, dst_path) t.daemon = True t.start() i = 0 while True: print 'm (%s): Fetch next %s, backlog currently at %s, have done %s' % (src_path, max_keys, q.qsize(), i) try: keys = src_bucket.get_all_keys(max_keys=max_keys, marker=result_marker, prefix=src_path or '') if len(keys) == 0: break for k in keys: i += 1 q.put(k.key) # print 'Added %s keys to queue' % len(keys) if len(keys) < max_keys: print 'All items now in queue' break result_marker = keys[max_keys - 1].key while q.qsize() > (q.maxsize - max_keys): time.sleep(1) # sleep if our queue is getting too big for the next set of keys except BaseException: logging.exception('error during fetch, quitting') break print 'm (%s): Waiting for queue to be completed' % (src_path) q.join() print print 'm (%s): Done' % (src_path) print
if args.debug: queueMsg("\"max\", \"file\", \"dir\", \"results\"") # lets just hang back and wait for the queues to empty print "If you need to pause this job, press Ctrl-C once" time.sleep(1) while not terminateThreads: if args.debug: queueMsg("\"%s\", \"%s\", \"%s\", \"%s\"\n"%(args.queueParams['max'], fileQueue.qsize(), dirQueue.qsize(), resultsQueue.qsize())) time.sleep(.1) if fileQueue.empty() and dirQueue.empty(): queueMsg("\"%s\", \"%s\", \"%s\", \"%s\"\n"%(args.queueParams['max'], fileQueue.qsize(), dirQueue.qsize(), resultsQueue.qsize())) print "waiting for directory queue to clear..." dirQueue.join() print "waiting for file queue to clear..." fileQueue.join() print "waiting for worker processes to complete..." terminateThreads = True print "waiting for results queue to clear..." resultsQueue.join() print "exporting statistics..." exportStats() print "closing files..." for file in fileHandles: fileHandles[file].close() print "cleaning up process files..." cleanup() exit(1) except KeyboardInterrupt:
class B2BucketThreadedLocal(B2Bucket): def __init__(self, *args): super(B2BucketThreaded, self).__init__( *args) num_threads=50 self.queue = LifoQueue(num_threads*2) self.file_locks = defaultdict(Lock) self.running = True self.threads = [] print "Thread ", for i in xrange(num_threads): t = threading.Thread(target=self._file_updater) t.start() self.threads.append(t) print ".", print self.pre_queue_lock = Lock() self.pre_queue_running = True self.pre_queue = LifoQueue(num_threads*2) self.pre_file_dict = {} self.pre_thread = threading.Thread(target=self._prepare_update) self.pre_thread.start() def _prepare_update(self): while self.pre_queue_running: try: filename, local_filename, operation = self.pre_queue.get(True,1) self.pre_file_dict[filename] = (time(), local_filename, operation) self.pre_queue.task_done() except Empty: for filename, (timestamp, local_filename, operation) in self.pre_file_dict.items(): if time()-timestamp > 15: self.queue.put((filename, local_filename, operation)) del self.pre_file_dict[filename] for filename, (timestamp, local_filename, operation) in self.pre_file_dict.items(): self.queue.put((filename, local_filename, operation)) del self.pre_file_dict[filename] def _file_updater(self): while self.running: try: filename, local_filename, operation = self.queue.get(True,1) except Empty: continue with self.file_locks[filename]: if operation == "deletion": super(B2BucketThreaded,self)._delete_file(filename) self.queue.task_done() elif operation == "upload": super(B2BucketThreaded,self)._put_file(filename, local_filename) self.queue.task_done() elif operation == "download": super(B2BucketThreaded,self)._get_file(filename, local_filename) self.queue.task_done() else: self.logger.error("Invalid operation %s on %s" % (operation, filename)) def __enter__(self): return self def __exit__(self, *args, **kwargs): self.logger.info("Waiting for all B2 requests to complete") self.logger.info("Pre-Queue contains %s elements", self.pre_queue.qsize()) self.pre_queue.join() self.logger.info("Joining pre queue thread") self.pre_queue_running = False self.pre_thread.join() self.logger.info("Queue contains %s elements", self.queue.qsize()) self.queue.join() self.logger.info("Joining threads") self.running = False for t in self.threads: t.join() def put_file(self, filename, local_filename): with self.pre_queue_lock: self.logger.info("Postponing upload of %s (%s)", filename, len(data)) self.pre_queue.put((filename, local_filename, "upload"), True) new_file = {} new_file['fileName'] = filename new_file['fileId'] = None new_file['uploadTimestamp'] = time() new_file['action'] = 'upload' new_file['contentLength'] = len(data) return new_file def delete_file(self, filename): with self.pre_queue_lock: self.logger.info("Postponing deletion of %s", filename) self.pre_queue.put((filename, None, "deletion"),True) def get_file(self, filename, local_filename): with self.pre_queue_lock: self.logger.info("Postponing download of %s", filename) self.pre_queue.put((filename, local_filename, "download"),True)
class B2BucketThreadedLocal(B2Bucket): def __init__(self, *args): super(B2BucketThreaded, self).__init__(*args) num_threads = 50 self.queue = LifoQueue(num_threads * 2) self.file_locks = defaultdict(Lock) self.running = True self.threads = [] print "Thread ", for i in xrange(num_threads): t = threading.Thread(target=self._file_updater) t.start() self.threads.append(t) print ".", print self.pre_queue_lock = Lock() self.pre_queue_running = True self.pre_queue = LifoQueue(num_threads * 2) self.pre_file_dict = {} self.pre_thread = threading.Thread(target=self._prepare_update) self.pre_thread.start() def _prepare_update(self): while self.pre_queue_running: try: filename, local_filename, operation = self.pre_queue.get( True, 1) self.pre_file_dict[filename] = (time(), local_filename, operation) self.pre_queue.task_done() except Empty: for filename, (timestamp, local_filename, operation) in self.pre_file_dict.items(): if time() - timestamp > 15: self.queue.put((filename, local_filename, operation)) del self.pre_file_dict[filename] for filename, (timestamp, local_filename, operation) in self.pre_file_dict.items(): self.queue.put((filename, local_filename, operation)) del self.pre_file_dict[filename] def _file_updater(self): while self.running: try: filename, local_filename, operation = self.queue.get(True, 1) except Empty: continue with self.file_locks[filename]: if operation == "deletion": super(B2BucketThreaded, self)._delete_file(filename) self.queue.task_done() elif operation == "upload": super(B2BucketThreaded, self)._put_file(filename, local_filename) self.queue.task_done() elif operation == "download": super(B2BucketThreaded, self)._get_file(filename, local_filename) self.queue.task_done() else: self.logger.error("Invalid operation %s on %s" % (operation, filename)) def __enter__(self): return self def __exit__(self, *args, **kwargs): self.logger.info("Waiting for all B2 requests to complete") self.logger.info("Pre-Queue contains %s elements", self.pre_queue.qsize()) self.pre_queue.join() self.logger.info("Joining pre queue thread") self.pre_queue_running = False self.pre_thread.join() self.logger.info("Queue contains %s elements", self.queue.qsize()) self.queue.join() self.logger.info("Joining threads") self.running = False for t in self.threads: t.join() def put_file(self, filename, local_filename): with self.pre_queue_lock: self.logger.info("Postponing upload of %s (%s)", filename, len(data)) self.pre_queue.put((filename, local_filename, "upload"), True) new_file = {} new_file['fileName'] = filename new_file['fileId'] = None new_file['uploadTimestamp'] = time() new_file['action'] = 'upload' new_file['contentLength'] = len(data) return new_file def delete_file(self, filename): with self.pre_queue_lock: self.logger.info("Postponing deletion of %s", filename) self.pre_queue.put((filename, None, "deletion"), True) def get_file(self, filename, local_filename): with self.pre_queue_lock: self.logger.info("Postponing download of %s", filename) self.pre_queue.put((filename, local_filename, "download"), True)
q.put('http://www.proxy4free.com/list/webproxy'+ str(i+1) +'.html') def downloader(queue,proxies): while True: url = queue.get() r = requests.get(url) if r.status_code == 200: parsed_html = BeautifulSoup(r.content) table = parsed_html.body.find('table',attrs={'class':'table table-striped proxy-list'}) table_body = table.find('tbody') rows = table_body.find_all('tr') print url + ' was fetched\n' for row in rows: proxy_tmp = {} ele_td = row.find("td",class_="first nsb") proxy_tmp['http'] = 'http://' + ele_td.text + ':80' if test_proxy(proxy_tmp): proxies.insert(proxy_tmp) else: print proxy_tmp['http'] + ' does not work, ignore it' q.task_done() if __name__ == '__main__': for i in range(max_threads): worker = Thread(target=downloader, args=(q,proxies)) worker.setDaemon(True) worker.start() q.join() client.close()
class TileProvider(QObject): THREAD_HEARTBEAT = 0.2 Tile = collections.namedtuple('Tile', 'id qimg rectF progress tiling') sceneRectChanged = pyqtSignal(QRectF) '''TileProvider __init__ Keyword Arguments: cache_size -- maximal number of encountered stacks to cache, i.e. slices if the imagesources draw from slicesources (default 10) request_queue_size -- maximal number of request to queue up (default 100000) n_threads -- maximal number of request threads; this determines the maximal number of simultaneously running requests to the pixelpipeline (default: 2) layerIdChange_means_dirty -- layerId changes invalidate the cache; by default only stackId changes do that (default False) parent -- QObject ''' @property def axesSwapped(self): return self._axesSwapped @axesSwapped.setter def axesSwapped(self, value): self._axesSwapped = value def __init__(self, tiling, stackedImageSources, cache_size=100, request_queue_size=100000, n_threads=2, layerIdChange_means_dirty=False, parent=None): QObject.__init__(self, parent=parent) self.tiling = tiling self.axesSwapped = False self._sims = stackedImageSources self._cache_size = cache_size self._request_queue_size = request_queue_size self._n_threads = n_threads self._layerIdChange_means_dirty = layerIdChange_means_dirty self._current_stack_id = self._sims.stackId self._cache = _TilesCache(self._current_stack_id, self._sims, maxstacks=self._cache_size) self._dirtyLayerQueue = LifoQueue(self._request_queue_size) self._prefetchQueue = Queue(self._request_queue_size) self._sims.layerDirty.connect(self._onLayerDirty) self._sims.visibleChanged.connect(self._onVisibleChanged) self._sims.opacityChanged.connect(self._onOpacityChanged) self._sims.sizeChanged.connect(self._onSizeChanged) self._sims.orderChanged.connect(self._onOrderChanged) self._sims.stackIdChanged.connect(self._onStackIdChanged) if self._layerIdChange_means_dirty: self._sims.layerIdChanged.connect(self._onLayerIdChanged) self._keepRendering = True self._dirtyLayerThreads = [ Thread(target=self._dirtyLayersWorker) for i in range(self._n_threads) ] for thread in self._dirtyLayerThreads: thread.daemon = True [thread.start() for thread in self._dirtyLayerThreads] def getTiles(self, rectF): '''Get tiles in rect and request a refresh. Returns tiles intersecting with rectF immediately and requests a refresh of these tiles. Next time you call this function the tiles may be already (partially) updated. If you want to wait until the rendering is fully complete, call join(). ''' self.requestRefresh(rectF) tile_nos = self.tiling.intersected(rectF) stack_id = self._current_stack_id for tile_no in tile_nos: qimg, progress = self._cache.tile(stack_id, tile_no) yield TileProvider.Tile(tile_no, qimg, QRectF(self.tiling.imageRects[tile_no]), progress, self.tiling) def requestRefresh(self, rectF): '''Requests tiles to be refreshed. Returns immediately. Call join() to wait for the end of the rendering. ''' tile_nos = self.tiling.intersected(rectF) for tile_no in tile_nos: stack_id = self._current_stack_id self._refreshTile(stack_id, tile_no) def prefetch(self, rectF, through): '''Request fetching of tiles in advance. Returns immediately. Prefetch will commence after all regular tiles are refreshed (see requestRefresh() and getTiles() ). The prefetch is reset when the 'through' value of the slicing changes. Several calls to prefetch are handeled in Fifo order. ''' if self._cache_size > 1: stack_id = (self._current_stack_id[0], enumerate(through)) if stack_id not in self._cache: self._cache.addStack(stack_id) self._cache.touchStack(self._current_stack_id) tile_nos = self.tiling.intersected(rectF) for tile_no in tile_nos: self._refreshTile(stack_id, tile_no, prefetch=True) def join(self): '''Wait until all refresh request are processed. Blocks until no refresh request pending anymore and all rendering finished. ''' return self._dirtyLayerQueue.join() def notifyThreadsToStop(self): '''Signals render threads to stop. Call this method at the end of the lifetime of a TileProvider instance. Otherwise the garbage collector will not clean up the instance (even if you call del). ''' self._keepRendering = False def threadsAreNotifiedToStop(self): '''Check if NotifyThreadsToStop() was called at least once.''' return not self._keepRendering def joinThreads(self, timeout=None): '''Wait until all threads terminated. Without calling notifyThreadsToStop, threads will never terminate. Arguments: timeout -- timeout in seconds as a floating point number ''' for thread in self._dirtyLayerThreads: thread.join(timeout) def aliveThreads(self): '''Return a map of thread identifiers and their alive status. All threads are alive until notifyThreadsToStop() is called. After that, they start dying. Call joinThreads() to wait for the last thread to die. ''' at = {} for thread in self._dirtyLayerThreads: if thread.ident: at[thread.ident] = thread.isAlive() return at def _dirtyLayersWorker(self): while self._keepRendering: # Save reference to the queue in case self._dirtyLayerQueue reassigned during this pass. # See onSizeChanged() dirtyLayerQueue = self._dirtyLayerQueue prefetchQueue = self._prefetchQueue try: try: result = dirtyLayerQueue.get_nowait() queue = dirtyLayerQueue except Empty: try: result = prefetchQueue.get_nowait() queue = prefetchQueue except Empty: try: result = dirtyLayerQueue.get( True, self.THREAD_HEARTBEAT) queue = dirtyLayerQueue except Empty: continue except TypeError: #the TypeError occurs when the queue #is already None when the thread is being shut down #on program exit. #This avoids a lot of warnings. continue ims, transform, tile_nr, stack_id, image_req, timestamp, cache = result try: try: layerTimestamp = cache.layerTimestamp( stack_id, ims, tile_nr) except KeyError: pass else: if timestamp > layerTimestamp: img = image_req.wait() img = img.transformed(transform) try: cache.updateTileIfNecessary( stack_id, ims, tile_nr, timestamp, img) except KeyError: pass else: if stack_id == self._current_stack_id and cache is self._cache: self.sceneRectChanged.emit( QRectF(self.tiling.imageRects[tile_nr])) except: with volumina.printLock: sys.excepthook(*sys.exc_info()) sys.stderr.write( "ERROR: volumina tiling layer rendering worker thread caught an unhandled exception. See above." ) finally: queue.task_done() def _refreshTile(self, stack_id, tile_no, prefetch=False): if not self.axesSwapped: transform = QTransform(0, 1, 0, 1, 0, 0, 1, 1, 1) else: transform = QTransform().rotate(90).scale(1, -1) transform *= self.tiling.data2scene try: if self._cache.tileDirty(stack_id, tile_no): if not prefetch: self._cache.setTileDirty(stack_id, tile_no, False) img = self._renderTile(stack_id, tile_no) self._cache.setTile(stack_id, tile_no, img, self._sims.viewVisible(), self._sims.viewOccluded()) # refresh dirty layer tiles for ims in self._sims.viewImageSources(): if self._cache.layerDirty(stack_id, ims, tile_no) \ and not self._sims.isOccluded(ims) \ and self._sims.isVisible(ims): rect = self.tiling.imageRects[tile_no] dataRect = self.tiling.scene2data.mapRect(rect) ims_req = ims.request(dataRect, stack_id[1]) if ims.direct and not prefetch: # The ImageSource 'ims' is fast (it has the # direct flag set to true) so we process # the request synchronously here. This # improves the responsiveness for layers # that have the data readily available. start = time.time() img = ims_req.wait() img = img.transformed(transform) stop = time.time() ims._layer.timePerTile( stop - start, self.tiling.imageRects[tile_no]) self._cache.updateTileIfNecessary( stack_id, ims, tile_no, time.time(), img) img = self._renderTile(stack_id, tile_no) self._cache.setTile(stack_id, tile_no, img, self._sims.viewVisible(), self._sims.viewOccluded()) else: req = (ims, transform, tile_no, stack_id, ims_req, time.time(), self._cache) try: if prefetch: self._prefetchQueue.put_nowait(req) else: self._dirtyLayerQueue.put_nowait(req) except Full: msg = " ".join( ("Request queue full.", "Dropping tile refresh request.", "Increase queue size!")) warnings.warn(msg) except KeyError: pass def _renderTile(self, stack_id, tile_nr): qimg = None p = None for i, v in enumerate(reversed(self._sims)): visible, layerOpacity, layerImageSource = v if not visible: continue patch = self._cache.layer(stack_id, layerImageSource, tile_nr) if patch is not None: if qimg is None: qimg = QImage(self.tiling.imageRects[tile_nr].size(), QImage.Format_ARGB32_Premultiplied) qimg.fill(0xffffffff) # Use a hex constant instead. p = QPainter(qimg) p.setOpacity(layerOpacity) p.drawImage(0, 0, patch) if p is not None: p.end() return qimg def _onLayerDirty(self, dirtyImgSrc, dataRect): sceneRect = self.tiling.data2scene.mapRect(dataRect) if dirtyImgSrc in self._sims.viewImageSources(): visibleAndNotOccluded = self._sims.isVisible( dirtyImgSrc ) \ and not self._sims.isOccluded( dirtyImgSrc ) for tile_no in xrange(len(self.tiling)): # an invalid rect means everything is dirty if not sceneRect.isValid() \ or self.tiling.tileRects[tile_no].intersected( sceneRect ): for ims in self._sims.viewImageSources(): self._cache.setLayerDirtyAll(ims, tile_no, True) if visibleAndNotOccluded: self._cache.setTileDirtyAll(tile_no, True) if visibleAndNotOccluded: self.sceneRectChanged.emit(QRectF(sceneRect)) def _onStackIdChanged(self, oldId, newId): if newId in self._cache: self._cache.touchStack(newId) else: self._cache.addStack(newId) self._current_stack_id = newId self._prefetchQueue = Queue(self._request_queue_size) self.sceneRectChanged.emit(QRectF()) def _onLayerIdChanged(self, ims, oldId, newId): if self._layerIdChange_means_dirty: self._onLayerDirty(ims, QRect()) def _onVisibleChanged(self, ims, visible): for tile_no in xrange(len(self.tiling)): self._cache.setTileDirtyAll(tile_no, True) if not self._sims.isOccluded(ims): self.sceneRectChanged.emit(QRectF()) def _onOpacityChanged(self, ims, opacity): for tile_no in xrange(len(self.tiling)): self._cache.setTileDirtyAll(tile_no, True) if self._sims.isVisible(ims) and not self._sims.isOccluded(ims): self.sceneRectChanged.emit(QRectF()) def _onSizeChanged(self): self._cache = _TilesCache(self._current_stack_id, self._sims, maxstacks=self._cache_size) self._dirtyLayerQueue = LifoQueue(self._request_queue_size) self._prefetchQueue = Queue(self._request_queue_size) self.sceneRectChanged.emit(QRectF()) def _onOrderChanged(self): for tile_no in xrange(len(self.tiling)): self._cache.setTileDirtyAll(tile_no, True) self.sceneRectChanged.emit(QRectF())
def copy_bucket(aws_key, aws_secret_key, src, dst, ignore_etag=False): max_keys = 1000 conn = S3Connection(aws_key, aws_secret_key) try: (src_bucket_name, src_path) = src.split('/', 1) except ValueError: src_bucket_name = src src_path = None try: (dst_bucket_name, dst_path) = dst.split('/', 1) except ValueError: dst_bucket_name = dst dst_path = None if dst_path is not None: raise ValueError( "not currently implemented to set dest path; must use default, which will mirror the source" ) srcBucket = conn.get_bucket(src_bucket_name) print print 'Start copy of %s to %s' % (src, dst) print result_marker = '' q = LifoQueue(maxsize=5000) for i in range(20): print 'Adding worker thread %s for queue processing' % i t = Worker(q, i, aws_key, aws_secret_key, src_bucket_name, dst_bucket_name, src_path, dst_path, ignore_etag) t.daemon = True t.start() i = 0 while True: print 'Fetch next %s, backlog currently at %s, have done %s' % \ (max_keys, q.qsize(), i) try: keys = srcBucket.get_all_keys(max_keys=max_keys, marker=result_marker, prefix=src_path or '') if len(keys) == 0: break for k in keys: i += 1 q.put(k.key) print 'Added %s keys to queue' % len(keys) if len(keys) < max_keys: print 'All items now in queue' break result_marker = keys[max_keys - 1].key while q.qsize() > (q.maxsize - max_keys): time.sleep( 1 ) # sleep if our queue is getting too big for the next set of keys except BaseException: logging.exception('error during fetch, quitting') break print 'Waiting for queue to be completed' q.join() print print 'Done' print
class SaveManager(QObject): start_save = pyqtSignal() report_error = pyqtSignal(object) save_done = pyqtSignal() def __init__(self, parent, notify=None): QObject.__init__(self, parent) self.count = 0 self.last_saved = -1 self.requests = LifoQueue() self.notify_requests = LifoQueue() self.notify_data = notify t = Thread(name='save-thread', target=self.run) t.daemon = True t.start() t = Thread(name='notify-thread', target=self.notify_calibre) t.daemon = True t.start() self.status_widget = w = SaveWidget(parent) self.start_save.connect(w.start, type=Qt.QueuedConnection) self.save_done.connect(w.stop, type=Qt.QueuedConnection) def schedule(self, tdir, container): self.count += 1 self.requests.put((self.count, tdir, container)) def run(self): while True: x = self.requests.get() if x is None: self.requests.task_done() self.__empty_queue() break try: count, tdir, container = x self.process_save(count, tdir, container) except: import traceback traceback.print_exc() finally: self.requests.task_done() def notify_calibre(self): while True: if not self.notify_requests.get(): break send_message(self.notify_data) def clear_notify_data(self): self.notify_data = None def __empty_queue(self): ' Only to be used during shutdown ' while True: try: self.requests.get_nowait() except Empty: break else: self.requests.task_done() def process_save(self, count, tdir, container): if count <= self.last_saved: shutil.rmtree(tdir, ignore_errors=True) return self.last_saved = count self.start_save.emit() try: self.do_save(tdir, container) except: import traceback self.report_error.emit(traceback.format_exc()) self.save_done.emit() if self.notify_data: self.notify_requests.put(True) def do_save(self, tdir, container): try: save_container(container, container.path_to_ebook) finally: shutil.rmtree(tdir, ignore_errors=True) @property def has_tasks(self): return bool(self.requests.unfinished_tasks) def wait(self, timeout=30): if timeout is None: self.requests.join() else: try: join_with_timeout(self.requests, timeout) except RuntimeError: return False return True def shutdown(self): self.requests.put(None) self.notify_requests.put(None)
class SaveManager(QObject): start_save = pyqtSignal() report_error = pyqtSignal(object) save_done = pyqtSignal() def __init__(self, parent): QObject.__init__(self, parent) self.count = 0 self.last_saved = -1 self.requests = LifoQueue() t = Thread(name='save-thread', target=self.run) t.daemon = True t.start() self.status_widget = w = SaveWidget(parent) self.start_save.connect(w.start, type=Qt.QueuedConnection) self.save_done.connect(w.stop, type=Qt.QueuedConnection) def schedule(self, tdir, container): self.count += 1 self.requests.put((self.count, tdir, container)) def run(self): while True: x = self.requests.get() if x is None: self.requests.task_done() self.__empty_queue() break try: count, tdir, container = x self.process_save(count, tdir, container) except: import traceback traceback.print_exc() finally: self.requests.task_done() def __empty_queue(self): ' Only to be used during shutdown ' while True: try: self.requests.get_nowait() except Empty: break else: self.requests.task_done() def process_save(self, count, tdir, container): if count <= self.last_saved: shutil.rmtree(tdir, ignore_errors=True) return self.last_saved = count self.start_save.emit() try: self.do_save(tdir, container) except: import traceback self.report_error.emit(traceback.format_exc()) self.save_done.emit() def do_save(self, tdir, container): temp = None try: path = container.path_to_ebook temp = PersistentTemporaryFile(prefix=('_' if iswindows else '.'), suffix=os.path.splitext(path)[1], dir=os.path.dirname(path)) temp.close() temp = temp.name container.commit(temp) atomic_rename(temp, path) finally: if temp and os.path.exists(temp): os.remove(temp) shutil.rmtree(tdir, ignore_errors=True) @property def has_tasks(self): return bool(self.requests.unfinished_tasks) def wait(self, timeout=30): if timeout is None: self.requests.join() else: try: join_with_timeout(self.requests, timeout) except RuntimeError: return False return True def shutdown(self): self.requests.put(None)