def test_queue_objects(self): queue = RedisQueue("test-queue", maxsize=100, host=REDIS_HOST) # put int queue.put(1) v = queue.get_nowait() self.assertEqual(v, 1) self.assertIsInstance(v, int) # put str queue.put("a") v = queue.get_nowait() self.assertEqual(v, "a") self.assertIsInstance(v, str) # put float queue.put(1.) v = queue.get_nowait() self.assertEqual(v, 1.) self.assertIsInstance(v, float) # put list queue.put([1, 3, 4, 5, "a", "b", "c", 1., 2., 3.]) v = queue.get_nowait() self.assertEqual(v, [1, 3, 4, 5, "a", "b", "c", 1., 2., 3.]) self.assertIsInstance(v, list) # put dict queue.put({"x": "y"}) v = queue.get_nowait() self.assertEqual(v, {"x": "y"}) self.assertIsInstance(v, dict)
def test_queue_objects(self): queue = RedisQueue("test-queue", maxsize=100, host=REDIS_HOST) # put int queue.put(1) v = queue.get_nowait() assert v == 1 assert isinstance(v, int) # put str queue.put("a") v = queue.get_nowait() assert v == "a" assert isinstance(v, six.string_types) # put float queue.put(1.) v = queue.get_nowait() assert v == 1. assert isinstance(v, float) # put list queue.put([1, 3, 4, 5, "a", "b", "c", 1., 2., 3.]) v = queue.get_nowait() assert v == [1, 3, 4, 5, "a", "b", "c", 1., 2., 3.] assert isinstance(v, list) # put dict queue.put({"x": "y"}) v = queue.get_nowait() assert v == {"x": "y"} assert isinstance(v, dict)
def test_queue_objects(self): queue = RedisQueue("test-queue", maxsize=100) # put int queue.put(1) v = queue.get_nowait() self.assertEqual(v, 1) self.assertIsInstance(v, int) # put str queue.put("a") v = queue.get_nowait() self.assertEqual(v, "a") self.assertIsInstance(v, str) # put float queue.put(1.) v = queue.get_nowait() self.assertEqual(v, 1.) self.assertIsInstance(v, float) # put list queue.put([1, 3, 4, 5, "a", "b", "c", 1., 2., 3.]) v = queue.get_nowait() self.assertEqual(v, [1, 3, 4, 5, "a", "b", "c", 1., 2., 3.]) self.assertIsInstance(v, list) # put dict queue.put({"x": "y"}) v = queue.get_nowait() self.assertEqual(v, {"x": "y"}) self.assertIsInstance(v, dict)
def run(self): try: try: self._real_run() except: logging.error('Worker %s (%s) encountered an error processing', self.name, self.description, exc_info=True) finally: for (queue, _) in self.input_queues: queue.mark_orphaned() for (queue, eof) in self.output_queues: try: queue.put(eof) except WorkerQueueOrphanedError: logging.debug( '%s: Sending queue EOF failed, queue ' 'already orphaned', self.name) except: logging.warning('%s: Error sending queue EOF', self.name, exc_info=True) except: self.exc_info = sys.exc_info() if not isinstance(self.exc_info[1], self.ignored_exception_types): log_exception( self.name, self.exc_info, ('Unexpected error in {0!s}'.format(self.description))) else: logging.debug('%s: Terminated by an exception', self.name, exc_info=True)
def _queue_record(self, queue, record): if not record['gt_boxes']: tf.logging.debug( 'Dropping record {} without gt_boxes.'.format(record)) return # If asking for a limited number per class, only yield if the current # example adds at least 1 new class that hasn't been maxed out. For # example, if "Persons" has been maxed out but "Bus" has not, a new # image containing only instances of "Person" will not be yielded, # while an image containing both "Person" and "Bus" instances will. if self._class_examples: labels_in_image = set([ self.classes[bbox['label']] for bbox in record['gt_boxes'] ]) not_maxed_out = labels_in_image - self._maxed_out_classes if not not_maxed_out: tf.logging.debug( 'Dropping record {} with maxed-out labels: {}'.format( record['filename'], labels_in_image)) return tf.logging.debug( 'Queuing record {} with labels: {}'.format( record['filename'], labels_in_image)) self._will_add_record(record) queue.put(record)
def test_datawriter_run_mock_scene(self, Publish, acquire_lock, release_locks, create_fnames, get_format_settings, create_message): import six.moves.queue as queue import time from trollflow_sat.tests.utils import (METADATA_FILE, MockScene, PRODUCT_LIST) create_fnames.return_value = (['overview.png'], 'overview') get_format_settings.return_value = [{'fill_value': 0, 'writer': 'foo'}] create_message.return_value = 'msg' self.writer.use_lock = True self.writer.prev_lock = 'foo' queue = queue.Queue() self.writer.writer.queue = queue scene = MockScene(attrs=METADATA_FILE) scene.attrs['area_id'] = 'area1' scene.load(['overview']) prod_list = PRODUCT_LIST['product_list']['area1']['products'] # Add a missing product prod_list['missing'] = {} meta = {'product_config': PRODUCT_LIST, 'products': prod_list} queue.put({'scene': scene, 'extra_metadata': meta}) time.sleep(1) self.assertTrue(create_fnames.called) self.assertTrue(get_format_settings) self.assertTrue(get_format_settings) # acquire_lock.assert_called_with(call(self.writer.prev_lock)) self.assertTrue(acquire_lock.called) self.assertTrue(release_locks.called)
def test_queue_context_after_close(queue_in_process): "Test that the queue correctly fails after closing it" with queue_in_process[0] as queue: pass queue.put("not in the context")
def test_queue_objects(self): queue = RedisQueue("test-queue", maxsize=100, host=REDIS_HOST) # put int queue.put(1) v = queue.get_nowait() assert v == 1 assert isinstance(v, int) # put str queue.put("a") v = queue.get_nowait() assert v == "a" assert isinstance(v, str) # put float queue.put(1.) v = queue.get_nowait() assert v == 1. assert isinstance(v, float) # put list queue.put([1, 3, 4, 5, "a", "b", "c", 1., 2., 3.]) v = queue.get_nowait() assert v == [1, 3, 4, 5, "a", "b", "c", 1., 2., 3.] assert isinstance(v, list) # put dict queue.put({"x": "y"}) v = queue.get_nowait() assert v == {"x": "y"} assert isinstance(v, dict)
def _ret_via_queue(func, queue): try: queue.put({'return': func()}) except Exception: LOGGER.debug('Error while running thread %s', threading.current_thread().name, exc_info=True) queue.put({'exception': sys.exc_info()})
def advance_step(f, *args, **kwargs): try: r = f(*args, **kwargs) queue.put(None) return r except Exception as e: queue.put(e) raise finish_subprocess()
def _reader_loop(self, socket): """ Thread function that reads the zmq socket and puts the data into the queue """ queue = self.reader_queue while True: try: data = socket.recv_json() __tag__ = data.get('tag') if __tag__ == '__quit__': # means to shutdown the thread # Close the socket just so the confirmation message # goes after the socket is closed. The 'with' # statement of _reader would close it anyways. socket.close() # Put None in the queue to signal clients that are # waiting for data queue.put(None) confirm_to = data.get('confirm_to', None) if confirm_to is not None: # Confirm that the socket was closed with Sender(confirm_to) as sender: confirm_msg = data.get('confirm_msg', None) sender.put(confirm_msg) self.namebroker_client.unregister(self.name) return if __tag__ == '__ping__': # answer special message without going to the receive, # since the actor may be doing something long lasting # and not reading the queue with Sender(data['reply_to']) as sender: sender.put({'tag': '__pong__'}) # avoid inserting this message in the queue continue if __tag__ == '__address__': # Fill the port info for my address with Sender(data['reply_to']) as sender: sender.put({ 'tag': 'reply', 'address': self.address(), 'pid': os.getpid() }) continue if __tag__ == '__low_level_ping__': # answer a ping from a straight zmq socket sender = data['reply_to'] with zmq_socket(zmq.PUSH) as s: s.connect(sender) s.send_json({'tag': '__pong__'}) continue except Exception: exc = traceback.format_exc() logger.debug('Reader thread for {} got an exception:'.format( self.path)) logger.debug(exc) return queue.put(data)
def _reader_loop(self, socket): """ Thread function that reads the zmq socket and puts the data into the queue """ queue = self.reader_queue while True: try: data = socket.recv_json() __tag__ = data.get('tag') if __tag__ == '__quit__': # means to shutdown the thread # Close the socket just so the confirmation message # goes after the socket is closed. The 'with' # statement of _reader would close it anyways. socket.close() # Put None in the queue to signal clients that are # waiting for data queue.put(None) confirm_to = data.get('confirm_to', None) if confirm_to is not None: # Confirm that the socket was closed with Sender(confirm_to) as sender: confirm_msg = data.get('confirm_msg', None) sender.put(confirm_msg) self.namebroker_client.unregister(self.name) return if __tag__ == '__ping__': # answer special message without going to the receive, # since the actor may be doing something long lasting # and not reading the queue with Sender(data['reply_to']) as sender: sender.put({'tag': '__pong__'}) # avoid inserting this message in the queue continue if __tag__ == '__address__': # Fill the port info for my address with Sender(data['reply_to']) as sender: sender.put({'tag': 'reply', 'address': self.address(), 'pid': os.getpid()}) continue if __tag__ == '__low_level_ping__': # answer a ping from a straight zmq socket sender = data['reply_to'] with zmq_socket(zmq.PUSH) as s: s.connect(sender) s.send_json({'tag': '__pong__'}) continue except Exception: exc = traceback.format_exc() logger.debug('Reader thread for {} got an exception:' .format(self.path)) logger.debug(exc) return queue.put(data)
def InvokeChromiumGenerateSymbols(args, lib_paths): """Invokes Chromium's script components/crash/content/tools/generate_breakpad_symbols.py for each lib of lib_paths.""" queue = six.moves.queue.Queue() print_lock = threading.Lock() at_least_one_failed = multiprocessing.Value('b', False) chromium_script = os.path.join(args.src_root, 'components/crash/content/tools/generate_breakpad_symbols.py') def _Worker(): while True: lib_path = queue.get() try: # Invoke the original Chromium script args_to_pass = ['python', chromium_script, '--build-dir=' + args.build_dir, '--symbols-dir=' + args.symbols_dir, '--binary=' + lib_path, '--platform=android', '--verbose' ] ret = subprocess.call(args_to_pass) if ret != 0: # Lets fail just not to ignore something important at_least_one_failed.value = True except Exception as e: # pylint: disable=broad-except if args.verbose: with print_lock: print(type(e)) print(e) finally: queue.task_done() for lib_path in lib_paths: queue.put(lib_path) for _ in range(args.jobs): t = threading.Thread(target=_Worker) t.daemon = True t.start() queue.join() if at_least_one_failed.value: return 1 return 0
def on_event(partition_context, event): if event: device_id = get_device_id_from_event(event) module_id = None # TODO: extract module_id if get_message_source_from_event(event) == "twinChangeEvents": queue = self._client_list.get_incoming_patch_queue( device_id, module_id) else: queue = self._client_list.get_incoming_event_queue( device_id, module_id) if queue: logger.info("Received {} for device {}, module {}".format( get_message_source_from_event(event), device_id, module_id)) queue.put(self._convert_incoming_event(event))
def test_queue_size(self): print(REDIS_HOST) print(os.getenv('REDIS_PORT_6379_TCP_ADDR')) queue = RedisQueue("test-queue-size-1", maxsize=1, host=REDIS_HOST) queue.put(1) with pytest.raises(six.moves.queue.Full): queue.put(1) queue = RedisQueue("test-queue-size-2", maxsize=2, host=REDIS_HOST) queue.put(1) queue.put(1) with pytest.raises(six.moves.queue.Full): queue.put(1) queue.get() queue.get() with pytest.raises(six.moves.queue.Empty): queue.get_nowait()
def test_datawriter_run_mock_process(self, Publish, compute, process): import six.moves.queue as queue import time queue = queue.Queue() self.writer.writer.queue = queue self.writer.writer.data.append('foo') self.writer.writer.messages.append('foo') # Add terminator to the queue queue.put(None) # Wait for the queue to be read time.sleep(1) self.assertTrue(compute.called) # data and message lists should be empty self.assertEqual(self.writer.writer.data, []) self.assertEqual(self.writer.writer.messages, []) queue.put('foo') time.sleep(1) self.assertTrue(process.called)
def _tee_process(pipe_out, control_pipe, real_out_fd, name, queue): """ A loop that reads a pipe, write the content of the pipe into a given file descriptor and put it into a queue. Stops when there is some data in the control pipe. """ # Ignore SIGINT signal, the tee process will be killed by the main process signal.signal(signal.SIGINT, signal.SIG_IGN) i = 0 pipes = [pipe_out, control_pipe] draining = False while pipes: try: r, _, _ = select.select(pipes, [], [], 0.1) if r: # Activate draining mode if control_pipe in r: draining = True pipes.remove(control_pipe) os.close(control_pipe) LOGGER.debug("Start draining tee process for %r", name) continue # Read data data = os.read(pipe_out, 1024) # Write the data in original fd os.write(real_out_fd, data) i += 1 queue.put((name, data)) else: if draining: LOGGER.debug("Draining tee process for %r (%r)", name, r) break except Exception: LOGGER.debug("LOOP ERROR", exc_info=True) LOGGER.debug("Tee process for %r finished", name)
def _request_wrapper(self, queue, url, params, timeout): """ Wrapper to requests used by each thread. Parameters ---------- queue : Queue.Queue The Queue to write the response from the request in. url : str The URL to be queried. params : dict A dictionary of parameters to pass to the request. timeout : int Timeout to wait for a response to the request. """ response = self.session.get(url, params=params, verify=self.verify, timeout=timeout) queue.put(response)
def _fn(*args, **kwargs): for i in range(int(1e6)): assert not queue.empty(), \ "trying to get() from an empty queue will deadlock" priority, next_trace = queue.get() try: ftr = poutine.trace(poutine.escape(poutine.replay(fn, next_trace), functools.partial(sample_escape, next_trace))) return ftr(*args, **kwargs) except NonlocalExit as site_container: site_container.reset_stack() for tr in poutine.util.enum_extend(ftr.trace.copy(), site_container.site): # add a little bit of noise to the priority to break ties... queue.put((tr.log_prob_sum().item() - torch.rand(1).item() * 1e-2, tr)) raise ValueError("max tries ({}) exceeded".format(str(1e6)))
def _ifla_event_input(msg, queue): """This function serves as a callback for netlink socket. When socket recieves a message, it passes it to callback function with optional extra argument (monitor's queue in this case) """ hdr = libnl.nlmsg_hdr(msg) if libnl.EVENTS.get(hdr[0].nlmsg_type) == 'new_link': rta_attr = libnl.nlmsg_find_attr(hdr, libnl.size_of_genlmsghdr(), libnl.IFLA_EVENT) if rta_attr: queue.put( Event( EventType.DATA, { 'IFLA_EVENT': libnl.IFLA_EVENT_MAP.get(libnl.nla_get_u32(rta_attr)) }, )) return libnl.NlCbAction.NL_STOP
def put_message(queue, message, timeout=None): """Puts message into queue using the timeout specified. Args: queue (Queue): to add message to message (object): to write into the queue timeout (float): time to wait in seconds for outgoing message to be accepted into queue. Raises: ValueError: if queue provided is not valid or has no put method Queue.Full: if outgoing queue is full and timeout was provided and was reached before message was sent. """ if queue is None or not hasattr(queue, "put"): raise ValueError("Invalid queue of {} provided".format(type(queue))) try: queue.put(message, block=True, timeout=timeout) except (IOError, socket.error): # manager shutdown pass
def _synchronize(obj, eng): queue = MyTimeoutQueue() # spawn a pool of threads, and pass them queue instance for i in range(len(args) - 1): t = MySpecialThread(queue) t.setDaemon(True) t.start() for func in args[0:-1]: if isinstance(func, list) or isinstance(func, tuple): new_eng = eng.duplicate() new_eng.setWorkflow(func) queue.put(lambda: new_eng.process([obj])) else: queue.put(lambda: func(obj, eng)) # wait on the queue until everything has been processed queue.join_with_timeout(timeout) # run the last func args[-1](obj, eng)
def _populateQueue(stream, queue, kill_event): ''' Collect lines from 'stream' and put them in 'quque'. ''' while not kill_event.is_set(): line = stream.readline() if line: queue.put(line) if print_output: # print only new line if print_new_line and line == self._lastline: continue self._lastline = line LOGGING.debug("[%s]%s" % (self.name, repr(line.strip()))) elif kill_event.is_set(): break elif raise_EOF: raise UnexpectedEndOfStream else: # print("EndOfStream: %s" % self.name) break
def _object_input(obj, queue): """This function serves as a callback for nl_msg_parse(message, callback, extra_argument) function. When nl_msg_parse() is called, it passes message as an object to defined callback with optional extra argument (monitor's queue in our case) """ obj_type = libnl.nl_object_get_type(obj) obj_dict = None if obj_type == libnl.RtnlObjectType.ADDR: obj_dict = _addr_info(obj) elif obj_type == libnl.RtnlObjectType.LINK: obj_dict = _link_info(obj) elif obj_type.split('/', 1)[0] == libnl.RtnlObjectType.BASE: obj_dict = _route_info(obj) if obj_dict is not None: msg_type = libnl.nl_object_get_msgtype(obj) try: obj_dict['event'] = libnl.EVENTS[msg_type] except KeyError: logging.error('unexpected msg_type %s', msg_type) else: queue.put(Event(EventType.DATA, obj_dict))
def run_worker_threads(threads, exception_types=()): '''Run the specified WorkerThreads. Start all threads, and wait for them in the specified order; make sure the EOF value is sent, if relevant. Automatically log exceptions, except for exceptions in exception_types - be silent about such exceptions, only collect the exception info of first such exception. Return (no exceptions raised, first collected exception info). ''' for t in threads: t.ignored_exception_types = exception_types t.start() ok = True exception = None for t in threads: # Terminate the input queues in case the producer threads crashed. All # queues should be large enough that we can (eventually) safely add one # more element; if "t" livelocks and never reads the queue, we would # block on t.join() anyway. logging.debug('Sending final EOFs to %s...', t.name) for (queue, eof) in t.input_queues: try: queue.put(eof) except WorkerQueueOrphanedError: pass logging.debug('Waiting for %s...', t.name) t.join() logging.debug('%s finished, exc_info: %s', t.name, repr(t.exc_info)) if t.exc_info is not None: ok = False if isinstance(t.exc_info[1], exception_types) and exception is None: exception = t.exc_info return (ok, exception)
def _shove(self, args, dialect, queue): _ui = args[4] _ui.info('Shovel process started') csv.register_dialect('dataset_dialect', dialect) batch_generator = BatchGenerator(*args) try: for batch in batch_generator: _ui.debug('queueing batch {}'.format(batch.id)) queue.put(batch) queue.put(SENTINEL) except csv.Error: queue.put(ERROR_SENTINEL) raise finally: queue.put(SENTINEL) if os.name is 'nt': _ui.close()
def test_queue_size(self): queue = RedisQueue("test-queue-size-1", maxsize=1) queue.put(1) self.assertRaises(six.moves.queue.Full, queue.put, 1) queue = RedisQueue("test-queue-size-2", maxsize=2) queue.put(1) queue.put(1) self.assertRaises(six.moves.queue.Full, queue.put, 1) queue.get() queue.get() self.assertRaises(six.moves.queue.Empty, queue.get_nowait)
def test_queue_size(self): print(REDIS_HOST) print(os.getenv('REDIS_PORT_6379_TCP_ADDR')) queue = RedisQueue("test-queue-size-1", maxsize=1, host=REDIS_HOST) queue.put(1) self.assertRaises(six.moves.queue.Full, queue.put, 1) queue = RedisQueue("test-queue-size-2", maxsize=2, host=REDIS_HOST) queue.put(1) queue.put(1) self.assertRaises(six.moves.queue.Full, queue.put, 1) queue.get() queue.get() self.assertRaises(six.moves.queue.Empty, queue.get_nowait)
def test_queue_len(self): queue = RedisQueue("test-queue-len", maxsize=100, host=REDIS_HOST) assert queue.length == 0 queue.put(1) assert queue.length == 1 queue.put(1) assert queue.length == 2 queue.put(1) assert queue.length == 3 queue.get_nowait() assert queue.length == 2 queue.get_nowait() assert queue.length == 1 queue.get_nowait() assert queue.length == 0
def test_queue_len(self): queue = RedisQueue("test-queue-len", maxsize=100) self.assertEqual(queue.length, 0) queue.put(1) self.assertEqual(queue.length, 1) queue.put(1) self.assertEqual(queue.length, 2) queue.put(1) self.assertEqual(queue.length, 3) queue.get_nowait() self.assertEqual(queue.length, 2) queue.get_nowait() self.assertEqual(queue.length, 1) queue.get_nowait() self.assertEqual(queue.length, 0)
def test_queue_len(self): queue = RedisQueue("test-queue-len", maxsize=100, host=REDIS_HOST) self.assertEqual(queue.length, 0) queue.put(1) self.assertEqual(queue.length, 1) queue.put(1) self.assertEqual(queue.length, 2) queue.put(1) self.assertEqual(queue.length, 3) queue.get_nowait() self.assertEqual(queue.length, 2) queue.get_nowait() self.assertEqual(queue.length, 1) queue.get_nowait() self.assertEqual(queue.length, 0)
def _enqueue_output(out, queue): """Enqueues lines from an output stream.""" for line in iter(out.readline, b''): queue.put(line.decode('utf-8')) out.close()
def put(args): parser = option_parser("put FILE URL") parser.add_option("-c", "--chunksize", dest="chunksize", action="store", type="int", metavar="X", default=10, help="Set the chunk size for multipart uploads to X MB." "A value of 0 disables multipart uploads. The default is 10MB, the min is 5MB " "and the max is 1024MB. This parameter only applies for sites that support " "multipart uploads (see multipart_uploads configuration parameter). The maximum " "number of chunks is 10,000, so if you are uploading a large file, then the " "chunksize is automatically increased to enable the upload. Choose smaller values " "to reduce the impact of transient failures.") parser.add_option("-p", "--parallel", dest="parallel", action="store", type="int", metavar="N", default=4, help="Use N threads to upload FILE in parallel. " "The default value is 4, which enables parallel uploads with 4 threads. This " "parameter is only valid if the site supports mulipart uploads and the " "--chunksize parameter is not 0. Otherwise parallel uploads are disabled.") parser.add_option("-b", "--create-bucket", dest="create_bucket", action="store_true", default=False, help="Create the destination bucket if it does not already exist") parser.add_option("-f", "--force", dest="force", action="store_true", default=False, help="Overwrite key if it already exists") parser.add_option("-r", "--recursive", dest="recursive", action="store_true", default=False, help="Treat FILE as a directory") options, args = parser.parse_args(args) if options.chunksize!=0 and (options.chunksize < 5 or options.chunksize > 1024): parser.error("Invalid chunksize") if options.parallel <= 0: parser.error("Invalid value for --parallel") if len(args) != 2: parser.error("Specify FILE and URL") path = fix_file(args[0]) url = args[1] if not os.path.exists(path): raise Exception("No such file or directory: %s" % path) # We need the path to be absolute to make it easier to compute relative # paths in the recursive mode of operation path = os.path.abspath(path) # Get a list of all the files to transfer if options.recursive: if os.path.isfile(path): infiles = [path] # We can turn off the recursive option if it is a single file options.recursive = False elif os.path.isdir(path): def subtree(dirname): result = [] for name in os.listdir(dirname): path = os.path.join(dirname, name) if os.path.isfile(path): result.append(path) elif os.path.isdir(path): result.extend(subtree(path)) return result infiles = subtree(path) else: if os.path.isdir(path): raise Exception("%s is a directory. Try --recursive." % path) infiles = [path] # Validate URL uri = parse_uri(url) if uri.bucket is None: raise Exception("URL for put must have a bucket: %s" % url) if uri.key is None: uri.key = os.path.basename(path) config = get_config(options) max_object_size = config.getint(uri.site, "max_object_size") # Does the site support multipart uploads? multipart_uploads = config.getboolean(uri.site, "multipart_uploads") # Warn the user if options.parallel > 0: if not multipart_uploads: warn("Multipart uploads disabled, ignoring --parallel ") elif options.chunksize == 0: warn("--chunksize set to 0, ignoring --parallel") conn = get_connection(config, uri) # Create the bucket if the user requested it and it does not exist if options.create_bucket: if conn.lookup(uri.bucket): info("Bucket %s exists" % uri.bucket) else: info("Creating bucket %s" % uri.bucket) conn.create_bucket(uri.bucket, location=conn.location) b = Bucket(connection=conn, name=uri.bucket) info("Uploading %d files" % len(infiles)) start = time.time() totalsize = 0 for infile in infiles: keyname = get_key_for_path(path, infile, uri.key) info("Uploading %s to %s/%s" % (infile, uri.bucket, keyname)) # Make sure file is not too large for the service size = os.stat(infile).st_size if size > (max_object_size*GB): raise Exception("File %s exceeds object size limit" " (%sGB) of service" % (infile, max_object_size)) totalsize += size k = Key(bucket=b, name=keyname) # Make sure the key does not exist if not options.force and k.exists(): raise Exception("Key exists: '%s'. Try --force." % k.name) if (not multipart_uploads) or (options.chunksize==0): # no multipart, or chunks disabled, just do it the simple way k.set_contents_from_filename(infile) else: # Multipart supported, chunking requested # The target chunk size is user-defined, but we may need # to go larger if the file is big because the maximum number # of chunks is 10,000. So the actual size of a chunk # will range from 5MB to ~525MB if the maximum object size # is 5 TB. part_size = max(options.chunksize*MB, size/9999) num_parts = int(math.ceil(size / float(part_size))) if num_parts <= 1: # Serial k.set_contents_from_filename(infile) else: # Parallel # Request upload info("Creating multipart upload") upload = b.initiate_multipart_upload(k.name) try: # Create all uploads uploads = [] for i in range(0, num_parts): length = min(size-(i*part_size), part_size) up = PartialUpload(upload, i+1, num_parts, infile, i*part_size, length) uploads.append(up) if options.parallel <= 1: # Serial for up in uploads: up() else: # Parallel # Queue up requests queue = Queue.Queue() for up in uploads: queue.put(up) # No sense forking more threads than there are chunks nthreads = min(options.parallel, num_parts) # Fork threads threads = [] for i in range(0, nthreads): t = WorkThread(queue) threads.append(t) t.start() # Wait for the threads for t in threads: t.join() # If any of the threads encountered # an error, then we fail here if t.exception is not None: raise t.exception info("Completing upload") upload.complete_upload() except Exception as e: # If there is an error, then we need to try and abort # the multipart upload so that it doesn't hang around # forever on the server. try: info("Aborting multipart upload") upload.cancel_upload() except Exception as f: sys.stderr.write("ERROR: Unable to abort multipart" " upload (use lsup/rmup): %s\n" % f) raise e end = time.time() totalsize = totalsize / 1024.0 elapsed = end - start if elapsed > 0: rate = totalsize / elapsed else: rate = 0.0 info("Uploaded %d files of %0.1f KB in %0.6f seconds: %0.2f KB/s" % (len(infiles), totalsize, elapsed, rate))
def get(args): parser = option_parser("get URL [FILE]") parser.add_option("-c", "--chunksize", dest="chunksize", action="store", type="int", metavar="X", default=10, help="Set the chunk size for parallel downloads to X " "megabytes. A value of 0 will avoid chunked reads. This option only applies for " "sites that support ranged downloads (see ranged_downloads configuration " "parameter). The default chunk size is 10MB, the min is 1MB and the max is " "1024MB. Choose smaller values to reduce the impact of transient failures.") parser.add_option("-p", "--parallel", dest="parallel", action="store", type="int", metavar="N", default=4, help="Use N threads to upload FILE in parallel. The " "default value is 4, which enables parallel downloads with 4 threads. " "This parameter is only valid if the site supports ranged downloads " "and the --chunksize parameter is not 0. Otherwise parallel downloads are " "disabled.") parser.add_option("-r", "--recursive", dest="recursive", action="store_true", help="Get all keys that start with URL") options, args = parser.parse_args(args) if options.chunksize < 0 or options.chunksize > 1024: parser.error("Invalid chunksize") if options.parallel <= 0: parser.error("Invalid value for --parallel") if len(args) == 0: parser.error("Specify URL") uri = parse_uri(args[0]) if uri.bucket is None: raise Exception("URL must contain a bucket: %s" % args[0]) if uri.key is None and not options.recursive: raise Exception("URL must contain a key or use --recursive") if len(args) > 1: output = fix_file(args[1]) elif uri.key is None: output = "./" else: output = os.path.basename(uri.key.rstrip("/")) info("Downloading %s" % uri) # Does the site support ranged downloads properly? config = get_config(options) ranged_downloads = config.getboolean(uri.site, "ranged_downloads") # Warn the user if options.parallel > 1: if not ranged_downloads: warn("ranged downloads not supported, ignoring --parallel") elif options.chunksize == 0: warn("--chunksize set to 0, ignoring --parallel") conn = get_connection(config, uri) b = Bucket(connection=conn, name=uri.bucket) if options.recursive: # Get all the keys we need to download def keyfilter(k): if uri.key is None: # We want all the keys in the bucket return True if uri.key.endswith("/"): # The user specified a "folder", so we should only match keys # in that "folder" return k.name.startswith(uri.key) if k.name == uri.key: # Match bare keys in case they specify recursive, but there # is a key that matches the specified path. Note that this # could cause a problem in the case where they have a key # called 'foo' and a "folder" called 'foo' in the same # bucket. In a file system that can't happen, but it can # happen in S3. return True if k.name.startswith(uri.key+"/"): # All other keys in the "folder" return True return False keys = [x for x in b.list(uri.key) if keyfilter(x)] else: # Just get the one key we need to download key = b.get_key(uri.key) if key is None: raise Exception("No such key. If %s is a folder, try --recursive." % uri.key) keys = [key] info("Downloading %d keys" % len(keys)) start = time.time() totalsize = 0 for key in keys: outfile = get_path_for_key(b.name, uri.key, key.name, output) info("Downloading %s/%s to %s" % (uri.bucket, key.name, outfile)) outfile = os.path.abspath(outfile) # This means that the key is a "folder", so we just need to create # a directory for it if key.name.endswith("/") and key.size == 0: if not os.path.isdir(outfile): os.makedirs(outfile) continue if os.path.isdir(outfile): raise Exception("%s is a directory" % outfile) outdir = os.path.dirname(outfile) if not os.path.isdir(outdir): os.makedirs(outdir) # We need this for the performance report totalsize += key.size if (not ranged_downloads) or (options.chunksize == 0): # Ranged downloads not supported, or chunking disabled key.get_contents_to_filename(outfile) else: # Ranged downloads and chunking requested # Compute chunks part_size = options.chunksize*MB num_parts = int(math.ceil(key.size / float(part_size))) if num_parts <= 1: # No point if there is only one chunk key.get_contents_to_filename(outfile) else: # Create the file and set it to the appropriate size. f = open(outfile, "w+b") f.seek(key.size-1) f.write('\0') f.close() # Create all the downloads downloads = [] for i in range(0, num_parts): dstart = i*part_size dend = min(key.size, dstart+part_size-1) down = PartialDownload(b, key.name, outfile, i+1, num_parts, dstart, dend) downloads.append(down) if options.parallel <= 1: # Serial for down in downloads: down() else: # Parallel # No sense forking more threads than there are chunks nthreads = min(options.parallel, num_parts) info("Starting parallel download with %d threads" % nthreads) # Queue up requests queue = Queue.Queue() for down in downloads: queue.put(down) # Fork threads threads = [] for i in range(0, nthreads): t = WorkThread(queue) threads.append(t) t.start() # Wait for the threads for t in threads: t.join() # If any of the threads encountered # an error, then we fail here if t.exception is not None: raise t.exception end = time.time() totalsize = totalsize / 1024.0 elapsed = end - start if elapsed > 0: rate = totalsize / elapsed else: rate = 0.0 info("Downloaded %d keys of %0.1f KB in %0.6f seconds: %0.2f KB/s" % (len(keys), totalsize, elapsed, rate))
def put(args): parser = option_parser("put FILE URL") parser.add_option( "-c", "--chunksize", dest="chunksize", action="store", type="int", metavar="X", default=10, help="Set the chunk size for multipart uploads to X MB." "A value of 0 disables multipart uploads. The default is 10MB, the min is 5MB " "and the max is 1024MB. This parameter only applies for sites that support " "multipart uploads (see multipart_uploads configuration parameter). The maximum " "number of chunks is 10,000, so if you are uploading a large file, then the " "chunksize is automatically increased to enable the upload. Choose smaller values " "to reduce the impact of transient failures.") parser.add_option( "-p", "--parallel", dest="parallel", action="store", type="int", metavar="N", default=4, help="Use N threads to upload FILE in parallel. " "The default value is 4, which enables parallel uploads with 4 threads. This " "parameter is only valid if the site supports mulipart uploads and the " "--chunksize parameter is not 0. Otherwise parallel uploads are disabled." ) parser.add_option( "-b", "--create-bucket", dest="create_bucket", action="store_true", default=False, help="Create the destination bucket if it does not already exist") parser.add_option("-f", "--force", dest="force", action="store_true", default=False, help="Overwrite key if it already exists") parser.add_option("-r", "--recursive", dest="recursive", action="store_true", default=False, help="Treat FILE as a directory") options, args = parser.parse_args(args) if options.chunksize != 0 and (options.chunksize < 5 or options.chunksize > 1024): parser.error("Invalid chunksize") if options.parallel <= 0: parser.error("Invalid value for --parallel") if len(args) != 2: parser.error("Specify FILE and URL") path = fix_file(args[0]) url = args[1] if not os.path.exists(path): raise Exception("No such file or directory: %s" % path) # We need the path to be absolute to make it easier to compute relative # paths in the recursive mode of operation path = os.path.abspath(path) # Get a list of all the files to transfer if options.recursive: if os.path.isfile(path): infiles = [path] # We can turn off the recursive option if it is a single file options.recursive = False elif os.path.isdir(path): def subtree(dirname): result = [] for name in os.listdir(dirname): path = os.path.join(dirname, name) if os.path.isfile(path): result.append(path) elif os.path.isdir(path): result.extend(subtree(path)) return result infiles = subtree(path) else: if os.path.isdir(path): raise Exception("%s is a directory. Try --recursive." % path) infiles = [path] # Validate URL uri = parse_uri(url) if uri.bucket is None: raise Exception("URL for put must have a bucket: %s" % url) if uri.key is None: uri.key = os.path.basename(path) config = get_config(options) max_object_size = config.getint(uri.site, "max_object_size") # Does the site support multipart uploads? multipart_uploads = config.getboolean(uri.site, "multipart_uploads") # Warn the user if options.parallel > 0: if not multipart_uploads: warn("Multipart uploads disabled, ignoring --parallel ") elif options.chunksize == 0: warn("--chunksize set to 0, ignoring --parallel") conn = get_connection(config, uri) # Create the bucket if the user requested it and it does not exist if options.create_bucket: if conn.lookup(uri.bucket): info("Bucket %s exists" % uri.bucket) else: info("Creating bucket %s" % uri.bucket) conn.create_bucket(uri.bucket, location=conn.location) b = Bucket(connection=conn, name=uri.bucket) info("Uploading %d files" % len(infiles)) start = time.time() totalsize = 0 for infile in infiles: keyname = get_key_for_path(path, infile, uri.key) info("Uploading %s to %s/%s" % (infile, uri.bucket, keyname)) # Make sure file is not too large for the service size = os.stat(infile).st_size if size > (max_object_size * GB): raise Exception("File %s exceeds object size limit" " (%sGB) of service" % (infile, max_object_size)) totalsize += size k = Key(bucket=b, name=keyname) # Make sure the key does not exist if not options.force and k.exists(): raise Exception("Key exists: '%s'. Try --force." % k.name) if (not multipart_uploads) or (options.chunksize == 0): # no multipart, or chunks disabled, just do it the simple way k.set_contents_from_filename(infile) else: # Multipart supported, chunking requested # The target chunk size is user-defined, but we may need # to go larger if the file is big because the maximum number # of chunks is 10,000. So the actual size of a chunk # will range from 5MB to ~525MB if the maximum object size # is 5 TB. part_size = max(options.chunksize * MB, size / 9999) num_parts = int(math.ceil(size / float(part_size))) if num_parts <= 1: # Serial k.set_contents_from_filename(infile) else: # Parallel # Request upload info("Creating multipart upload") upload = b.initiate_multipart_upload(k.name) try: # Create all uploads uploads = [] for i in range(0, num_parts): length = min(size - (i * part_size), part_size) up = PartialUpload(upload, i + 1, num_parts, infile, i * part_size, length) uploads.append(up) if options.parallel <= 1: # Serial for up in uploads: up() else: # Parallel # Queue up requests queue = Queue.Queue() for up in uploads: queue.put(up) # No sense forking more threads than there are chunks nthreads = min(options.parallel, num_parts) # Fork threads threads = [] for i in range(0, nthreads): t = WorkThread(queue) threads.append(t) t.start() # Wait for the threads for t in threads: t.join() # If any of the threads encountered # an error, then we fail here if t.exception is not None: raise t.exception info("Completing upload") upload.complete_upload() except Exception as e: # If there is an error, then we need to try and abort # the multipart upload so that it doesn't hang around # forever on the server. try: info("Aborting multipart upload") upload.cancel_upload() except Exception as f: sys.stderr.write("ERROR: Unable to abort multipart" " upload (use lsup/rmup): %s\n" % f) raise e end = time.time() totalsize = totalsize / 1024.0 elapsed = end - start if elapsed > 0: rate = totalsize / elapsed else: rate = 0.0 info("Uploaded %d files of %0.1f KB in %0.6f seconds: %0.2f KB/s" % (len(infiles), totalsize, elapsed, rate))
def wait_for_exit_code(client, queue): queue.put(client.wait_for_command_exit_code())
def triggerSaveTimer(queue): Timer(MIN_SAVE_INTERVAL_SEC, triggerSaveTimer, [queue]).start() queue.put(dict(saveTrigger=None))
def get(args): parser = option_parser("get URL [FILE]") parser.add_option( "-c", "--chunksize", dest="chunksize", action="store", type="int", metavar="X", default=10, help="Set the chunk size for parallel downloads to X " "megabytes. A value of 0 will avoid chunked reads. This option only applies for " "sites that support ranged downloads (see ranged_downloads configuration " "parameter). The default chunk size is 10MB, the min is 1MB and the max is " "1024MB. Choose smaller values to reduce the impact of transient failures." ) parser.add_option( "-p", "--parallel", dest="parallel", action="store", type="int", metavar="N", default=4, help="Use N threads to upload FILE in parallel. The " "default value is 4, which enables parallel downloads with 4 threads. " "This parameter is only valid if the site supports ranged downloads " "and the --chunksize parameter is not 0. Otherwise parallel downloads are " "disabled.") parser.add_option("-r", "--recursive", dest="recursive", action="store_true", help="Get all keys that start with URL") options, args = parser.parse_args(args) if options.chunksize < 0 or options.chunksize > 1024: parser.error("Invalid chunksize") if options.parallel <= 0: parser.error("Invalid value for --parallel") if len(args) == 0: parser.error("Specify URL") uri = parse_uri(args[0]) if uri.bucket is None: raise Exception("URL must contain a bucket: %s" % args[0]) if uri.key is None and not options.recursive: raise Exception("URL must contain a key or use --recursive") if len(args) > 1: output = fix_file(args[1]) elif uri.key is None: output = "./" else: output = os.path.basename(uri.key.rstrip("/")) info("Downloading %s" % uri) # Does the site support ranged downloads properly? config = get_config(options) ranged_downloads = config.getboolean(uri.site, "ranged_downloads") # Warn the user if options.parallel > 1: if not ranged_downloads: warn("ranged downloads not supported, ignoring --parallel") elif options.chunksize == 0: warn("--chunksize set to 0, ignoring --parallel") conn = get_connection(config, uri) b = Bucket(connection=conn, name=uri.bucket) if options.recursive: # Get all the keys we need to download def keyfilter(k): if uri.key is None: # We want all the keys in the bucket return True if uri.key.endswith("/"): # The user specified a "folder", so we should only match keys # in that "folder" return k.name.startswith(uri.key) if k.name == uri.key: # Match bare keys in case they specify recursive, but there # is a key that matches the specified path. Note that this # could cause a problem in the case where they have a key # called 'foo' and a "folder" called 'foo' in the same # bucket. In a file system that can't happen, but it can # happen in S3. return True if k.name.startswith(uri.key + "/"): # All other keys in the "folder" return True return False keys = [x for x in b.list(uri.key) if keyfilter(x)] else: # Just get the one key we need to download key = b.get_key(uri.key) if key is None: raise Exception( "No such key. If %s is a folder, try --recursive." % uri.key) keys = [key] info("Downloading %d keys" % len(keys)) start = time.time() totalsize = 0 for key in keys: outfile = get_path_for_key(b.name, uri.key, key.name, output) info("Downloading %s/%s to %s" % (uri.bucket, key.name, outfile)) outfile = os.path.abspath(outfile) # This means that the key is a "folder", so we just need to create # a directory for it if key.name.endswith("/") and key.size == 0: if not os.path.isdir(outfile): os.makedirs(outfile) continue if os.path.isdir(outfile): raise Exception("%s is a directory" % outfile) outdir = os.path.dirname(outfile) if not os.path.isdir(outdir): os.makedirs(outdir) # We need this for the performance report totalsize += key.size if (not ranged_downloads) or (options.chunksize == 0): # Ranged downloads not supported, or chunking disabled key.get_contents_to_filename(outfile) else: # Ranged downloads and chunking requested # Compute chunks part_size = options.chunksize * MB num_parts = int(math.ceil(key.size / float(part_size))) if num_parts <= 1: # No point if there is only one chunk key.get_contents_to_filename(outfile) else: # Create the file and set it to the appropriate size. f = open(outfile, "w+b") f.seek(key.size - 1) f.write('\0') f.close() # Create all the downloads downloads = [] for i in range(0, num_parts): dstart = i * part_size dend = min(key.size, dstart + part_size - 1) down = PartialDownload(b, key.name, outfile, i + 1, num_parts, dstart, dend) downloads.append(down) if options.parallel <= 1: # Serial for down in downloads: down() else: # Parallel # No sense forking more threads than there are chunks nthreads = min(options.parallel, num_parts) info("Starting parallel download with %d threads" % nthreads) # Queue up requests queue = Queue.Queue() for down in downloads: queue.put(down) # Fork threads threads = [] for i in range(0, nthreads): t = WorkThread(queue) threads.append(t) t.start() # Wait for the threads for t in threads: t.join() # If any of the threads encountered # an error, then we fail here if t.exception is not None: raise t.exception end = time.time() totalsize = totalsize / 1024.0 elapsed = end - start if elapsed > 0: rate = totalsize / elapsed else: rate = 0.0 info("Downloaded %d keys of %0.1f KB in %0.6f seconds: %0.2f KB/s" % (len(keys), totalsize, elapsed, rate))