def main(options, args): if not 1 == len(args): print "No filename or path to upload supplied:" parser.print_help() exit(1) flickr = authenticate_to_flickr() # Upload a single file if not options.recursive: if already_uploaded(flickr, args[0]): print "Already uploaded, skipping" exit(1) else: print "Uploading" upload_file(flickr, args[0]) exit(0) # Use worker threads to upload a directory tree full of files queue = Queue() queue.done = False queue.count = itertools.count() def do_work(): upload_from_queue(flickr, queue) # Spawn workers workers = [Thread(target=do_work) for _ in range(options.workers)] for worker in workers: worker.start() # Use a watcher thread to print out status about tue queue def watch(): print_status(queue) watcher = Thread(target=watch) watcher.daemon = True watcher.start() # Enqueue work for the workers for path in files_to_upload(flickr, args[0]): queue.put(path) queue.done = True # Wait for everything to finish queue.join() print "\n\n DONE"
def distribute_tasks(tasks, action, nthreads=4, queue_size=10, retry_exceptions=False, batch_size=None, output_action=None): """ Distribute the elements in tasks over a nthreads threads using a queue. The trheads will call action(task) on each element in tasks. If action(task) raises an exception, the element is placed on the problem list. If retry_exceptions is non-False, after all elements are done the problematic elements are retried. Otherwise, the list of problems is returned. If batch_size is not None, will 'cut' tasks into batches of that size and place the sub-sequences on the queue If output_action is given, this function will be called from the worker thread for the result of each action """ starttime = time.time() count = 0 queue = Queue(queue_size) problems = [] log.debug("Creating and starting {nthreads} threads".format(**locals())) for i in range(nthreads): QueueProcessorThread(action, queue, problems, output_action, name="Worker_%i" % i).start() log.debug("Placing tasks on queue") if batch_size: for subset in toolkit.splitlist(tasks, batch_size): count += len(subset) queue.put(subset) else: for task in tasks: queue.put(task) count += 1 log.debug("Waiting until queue is empty") queue.join() while problems and retry_exceptions: log.debug('Retrying {n} problematic tasks'.format(n=len(problems))) # use a temporary list to hold problems and clear problems list before retrying _problems = problems[:] del problems[:] for problem in _problems: queue.put(problem) queue.join() if type(retry_exceptions) == int: retry_exceptions -= 1 queue.done = True total_time = time.time() - starttime rate = count / (total_time + .00001) log.debug( 'Processed {count} tasks in {total_time:.0f} seconds ({rate:.2f}/second)' .format(**locals())) return problems