def main(argv): logging.basicConfig(filename=options.log_filename, level=logging.INFO + 10 * (options.quiet - options.verbose)) log = logging.getLogger(os.path.basename(sys.argv[0])) FORMAT = '%(asctime)s|%(levelname)s|%(process)d|%(module)s.py|%(funcName)s|%(lineno)d| %(message)s' if 1: handler = logging.StreamHandler(sys.stdout) handler.setLevel(logging.DEBUG) formatter = logging.Formatter(FORMAT, datefmt="%Y-%m-%d %H:%M:%S") handler.setFormatter(formatter) #pprint(dir(handler)) log.addHandler(handler) #log.info('test') #e() file_object_cache = FileObjectCache() #key_name, value_kwargs = args #value = Value(file_object_cache, content=None, filename=None, md5=None, offset=None, path=None, size=None, bucket_name=None) start = time.time() if 1: put_queue = JoinableQueue(1024 * options.processes) stat_queue = JoinableQueue() walk = {'filesystem': walk_filesystem}[options.walk] args = [ '/auto/fina-data/share/FARepository/prod/CIGActgS11/position/processing/Priority_2/PositionSide/122654_DESK_CDRG183872PositionSide.bcp.SSrvr' ] walker_process = Process(target=walker, args=(walk, put_queue, args, options)) walker_process.start() if 1: put = {'update': put_update}[options.put] #print put #e() putter_processes = list( islice( repeatedly(Process, target=putter, args=(put, put_queue, stat_queue, options)), options.processes)) for putter_process in putter_processes: #print putter_process putter_process.start() walker_process.join() if 1: statter_process = Process(target=statter, args=(stat_queue, start, options)) statter_process.start() for putter_process in putter_processes: put_queue.put(None) put_queue.close() for putter_process in putter_processes: putter_process.join() stat_queue.put(None) stat_queue.close() statter_process.join() put_queue.join_thread() stat_queue.join_thread()
def insert_files(self, out,cfg, producer,return_dict, skip_header=0, rec_delim=os.linesep): self.opt.skip_header = skip_header self.opt.rec_delim = rec_delim log = logging.getLogger('cli') self.scfg, self.tcfg = cfg file_object_cache = FileObjectCache() start = time.time() stat_queue = JoinableQueue() if 1: put_queue = JoinableQueue(1024 * self.opt.processes) if 1: put = {'update': self.put_update}[self.opt.put] putter_processes = list(islice(repeatedly(Process, target=self.putter, args=(put, put_queue, stat_queue, return_dict)), self.opt.processes)) for putter_process in putter_processes: putter_process.start() if 1: statter_process = Process(target=self.statter, args=(stat_queue, start)) statter_process.start() out_names=[] #walk = {'filesystem': self.walk_filesystem}[self.opt.walk] for file in producer[0](*producer[1]): out_names.append(file) put_queue.put(file) #time.sleep(3) out.dump_files=out_names for putter_process in putter_processes: put_queue.put(None) put_queue.close() for putter_process in putter_processes: putter_process.join() stat_queue.put(None) stat_queue.close() statter_process.join() put_queue.join_thread() stat_queue.join_thread() print 77777, counter.value() print 77777, self.total_ins print 7777, (return_dict.values())
def insert_files(self, file_names, out,cfg, skip_header=0, rec_delim=os.linesep): self.opt.skip_header = skip_header self.opt.rec_delim = rec_delim log = logging.getLogger('cli') self.scfg, self.tcfg = cfg file_object_cache = FileObjectCache() start = time.time() if 1: put_queue = JoinableQueue(1024 * self.opt.processes) stat_queue = JoinableQueue() #walk = {'filesystem': self.walk_filesystem}[self.opt.walk] for file in file_names.file_names: put_queue.put(file) if 1: put = {'update': self.put_update}[self.opt.put] putter_processes = list(islice(repeatedly(Process, target=self.putter, args=(put, put_queue, stat_queue)), self.opt.processes)) for putter_process in putter_processes: putter_process.start() if 1: statter_process = Process(target=self.statter, args=(stat_queue, start)) statter_process.start() for putter_process in putter_processes: put_queue.put(None) put_queue.close() for putter_process in putter_processes: putter_process.join() stat_queue.put(None) stat_queue.close() statter_process.join() put_queue.join_thread() stat_queue.join_thread() #print(3334,file_names.file_names ) #e() out.file_names = ['%s.gz' % os.path.basename(x[0]) for x in file_names.file_names] #pp(out.file_names) #e() out.file_keys = ['%s.gz' % x[0] for x in file_names.file_names] out.file_location = os.path.dirname(file_names.file_names[0][0])
def parallel_for(a, cls, args=[], kwargs={}, num_processes=None): from multiprocessing import Process, JoinableQueue, cpu_count, Pipe if num_processes is None: num_processes = cpu_count() # Note that JoinableQueue uses an integer for tracking locations in the queue. # Because it's using shared memory it's not terribly flexible and gives annoyingly # unclear errors if you go over the limit. We'd like the queue to be as large as # possible so that we can avoid contention, but without allocating a max possible # size queue unless we need it, thus the calculation below. 32767 is a hard limit. q = JoinableQueue(maxsize=min(len(a)+num_processes, 2**15 - 1)) output_pipes = [Pipe(duplex=False) for _ in range(num_processes)] send_pipes = [p for _, p in output_pipes] recv_pipes = [p for p, _ in output_pipes] pool = [Process(target=_parallel_for, args=(q, cls, pipe) + tuple(args), kwargs=kwargs) for pipe in send_pipes] output_watcher = MultiPipeWatcher(recv_pipes) try: for p in pool: p.start() output_watcher.start() for x in a: q.put(x) for _ in range(num_processes): q.put(None) # End markers q.close() q.join_thread() q.join() for p in pool: p.join() output_watcher.flush() output_watcher.join() combined_output = output_watcher.merged return combined_output except KeyboardInterrupt: print "Interrupted -- terminating worker processes" for p in pool: p.terminate() for p in pool: p.join() raise
def __run_chm_test_procs(mems, model, regions, ntasks, nthreads): """Starts ntasks processes running __run_chm_test_proc then calls __run_chm_test_parallel.""" from multiprocessing import JoinableQueue, Process from time import sleep print("Running CHM test with %d task%s and %d thread%s per task" % (ntasks, 's' if ntasks > 1 else '', nthreads, 's' if nthreads > 1 else '')) nthreads_full = ntasks * nthreads # Start the child processes q = JoinableQueue() args = (mems, model, nthreads, q) processes = [ Process(target=__run_chm_test_proc, name="CHM-test-%d" % p, args=args) for p in xrange(ntasks) ] for p in processes: p.daemon = True p.start() sleep(0) # Run the CHM-test in parallel try: out = __run_chm_test_parallel(mems, model, regions, q, processes, nthreads_full) except: __clear_queue(q) __kill_processes(processes) raise # Tell all processes we are done and make sure they all actually terminate for _ in xrange(ntasks): q.put_nowait(None) q.close() q.join() q.join_thread() for p in processes: p.join() # Done! Return the output image return out
def parallel_for(a, cls, args=[], kwargs={}, num_processes=None): from multiprocessing import Process, JoinableQueue, cpu_count, Pipe if num_processes is None: num_processes = cpu_count() # Note that JoinableQueue uses an integer for tracking locations in the queue. # Because it's using shared memory it's not terribly flexible and gives annoyingly # unclear errors if you go over the limit. We'd like the queue to be as large as # possible so that we can avoid contention, but without allocating a max possible # size queue unless we need it, thus the calculation below. 32767 is a hard limit. q = JoinableQueue(maxsize=min(len(a)+num_processes, 2**15 - 1)) output_pipes = [Pipe(duplex=False) for _ in range(num_processes)] send_pipes = [p for _, p in output_pipes] recv_pipes = [p for p, _ in output_pipes] pool = [Process(target=_parallel_for, args=(q, cls, pipe) + tuple(args), kwargs=kwargs) for pipe in send_pipes] output_watcher = MultiPipeWatcher(recv_pipes) try: for p in pool: p.start() output_watcher.start() for x in a: q.put(x) for _ in range(num_processes): q.put(None) # End markers q.close() q.join_thread() q.join() for p in pool: p.join() output_watcher.flush() output_watcher.join() combined_output = output_watcher.merged return combined_output except KeyboardInterrupt: print("Interrupted -- terminating worker processes") for p in pool: p.terminate() for p in pool: p.join() raise
def parexec(signal, out, num_consumers, iterator): t = time.time() tasks = JoinableQueue() results = Queue() print 'starting consumers' consumers = [Consumer(tasks, results, [signal]) for _ in range(num_consumers)] for w in consumers: w.start() print 'adding tasks' for i in iterator: tasks.put(Task(i, signal)) for i in range(num_consumers): tasks.put(None) print 'collecting' for _ in range(len(iterator)): out.append(results.get()) if _%100000 == 0: print _ tasks.close() tasks.join_thread() print 'closing' for w in consumers: w.join() print time.time() - t
def main(argv): parser = OptionParser() group = OptionGroup(parser, 'S3 options') group.add_option('--bucket', metavar='BUCKET', help='set bucket') group.add_option('--insecure', action='store_false', dest='secure', help='use insecure connection') group.add_option('--secure', action='store_true', default=True, dest='secure', help='use secure connection') parser.add_option_group(group) group = OptionGroup(parser, 'Source options') group.add_option('--walk', choices=('filesystem', 'tar'), default='filesystem', metavar='MODE', help='set walk mode (filesystem or tar)') parser.add_option_group(group) group = OptionGroup(parser, 'Put options') group.add_option('--content-type', metavar='CONTENT-TYPE', help='set content type') group.add_option('--gzip', action='store_true', help='gzip values and set content encoding') group.add_option('--put', choices=('add', 'stupid', 'update'), default='update', metavar='MODE', help='set put mode (add, stupid, or update)') group.add_option('--prefix', default='', metavar='PREFIX', help='set key prefix') group.add_option('--resume', action='append', default=[], metavar='FILENAME', help='resume from log file') group.add_option('--grant', metavar='GRANT', default=None, choices=CannedACLStrings, help='A canned ACL policy to be applied to each file uploaded.\nChoices: %s' % ', '.join(CannedACLStrings)) parser.add_option_group(group) group = OptionGroup(parser, 'Logging options') group.add_option('--log-filename', metavar='FILENAME', help='set log filename') group.add_option('--quiet', '-q', action='count', default=0, help='less output') group.add_option('--verbose', '-v', action='count', default=0, help='more output') parser.add_option_group(group) group = OptionGroup(parser, 'Debug and performance tuning options') group.add_option('--dry-run', action='store_true', help='don\'t write to S3') group.add_option('--limit', metavar='N', type=int, help='set maximum number of keys to put') group.add_option('--processes', default=8, metavar='PROCESSES', type=int, help='set number of putter processes') parser.add_option_group(group) options, args = parser.parse_args(argv[1:]) logging.basicConfig(filename=options.log_filename, level=logging.INFO + 10 * (options.quiet - options.verbose)) logger = logging.getLogger(os.path.basename(sys.argv[0])) if len(args) < 1: logger.error('missing source operand') return 1 if not options.bucket: logger.error('missing bucket') return 1 connection = S3Connection(is_secure=options.secure) bucket = connection.get_bucket(options.bucket) del bucket del connection start = time.time() put_queue = JoinableQueue(1024 * options.processes) stat_queue = JoinableQueue() walk = {'filesystem': walk_filesystem, 'tar': walk_tar}[options.walk] walker_process = Process(target=walker, args=(walk, put_queue, args, options)) walker_process.start() put = {'add': put_add, 'stupid': put_stupid, 'update': put_update}[options.put] putter_processes = list(islice(repeatedly(Process, target=putter, args=(put, put_queue, stat_queue, options)), options.processes)) for putter_process in putter_processes: putter_process.start() statter_process = Process(target=statter, args=(stat_queue, start, options)) statter_process.start() walker_process.join() for putter_process in putter_processes: put_queue.put(None) put_queue.close() for putter_process in putter_processes: putter_process.join() stat_queue.put(None) stat_queue.close() statter_process.join() put_queue.join_thread() stat_queue.join_thread()
def main(argv): parser = OptionParser() group = OptionGroup(parser, 'S3 options') group.add_option('--bucket', metavar='BUCKET', help='set bucket') group.add_option('--host', default='s3.amazonaws.com', help='set AWS host name') group.add_option('--insecure', action='store_false', dest='secure', help='use insecure connection') group.add_option('--secure', action='store_true', default=True, dest='secure', help='use secure connection') parser.add_option_group(group) group = OptionGroup(parser, 'Source options') group.add_option('--walk', choices=('filesystem', 'tar'), default='filesystem', metavar='MODE', help='set walk mode (filesystem or tar)') parser.add_option_group(group) group = OptionGroup(parser, 'Put options') group.add_option( '--content-type', metavar='CONTENT-TYPE', help='set content type, set to "guess" to guess based on file name') group.add_option('--gzip', action='store_true', help='gzip values and set content encoding') group.add_option('--put', choices=('add', 'stupid', 'update'), default='update', metavar='MODE', help='set put mode (add, stupid, or update)') group.add_option('--prefix', default='', metavar='PREFIX', help='set key prefix') group.add_option('--resume', action='append', default=[], metavar='FILENAME', help='resume from log file') group.add_option( '--grant', metavar='GRANT', default=None, choices=CannedACLStrings, help= 'A canned ACL policy to be applied to each file uploaded.\nChoices: %s' % ', '.join(CannedACLStrings)) group.add_option( '--header', metavar='HEADER:VALUE', dest='headers', action='append', help='extra headers to add to the file, can be specified multiple times' ) parser.add_option_group(group) group = OptionGroup(parser, 'Logging options') group.add_option('--log-filename', metavar='FILENAME', help='set log filename') group.add_option('--quiet', '-q', action='count', default=0, help='less output') group.add_option('--verbose', '-v', action='count', default=0, help='more output') parser.add_option_group(group) group = OptionGroup(parser, 'Debug and performance tuning options') group.add_option('--dry-run', action='store_true', help='don\'t write to S3') group.add_option('--limit', metavar='N', type=int, help='set maximum number of keys to put') group.add_option('--processes', default=8, metavar='PROCESSES', type=int, help='set number of putter processes') parser.add_option_group(group) options, args = parser.parse_args(argv[1:]) logging.basicConfig(filename=options.log_filename, level=logging.INFO + 10 * (options.quiet - options.verbose)) logger = logging.getLogger(os.path.basename(sys.argv[0])) if len(args) < 1: logger.error('missing source operand') return 1 if not options.bucket: logger.error('missing bucket') return 1 connection = S3Connection(is_secure=options.secure) bucket = connection.get_bucket(options.bucket) del bucket del connection start = time.time() put_queue = JoinableQueue(1024 * options.processes) stat_queue = JoinableQueue() walk = {'filesystem': walk_filesystem, 'tar': walk_tar}[options.walk] walker_process = Process(target=walker, args=(walk, put_queue, args, options)) walker_process.start() put = { 'add': put_add, 'stupid': put_stupid, 'update': put_update }[options.put] putter_processes = list( islice( repeatedly(Process, target=putter, args=(put, put_queue, stat_queue, options)), options.processes)) for putter_process in putter_processes: putter_process.start() statter_process = Process(target=statter, args=(stat_queue, start, options)) statter_process.start() walker_process.join() for putter_process in putter_processes: put_queue.put(None) put_queue.close() for putter_process in putter_processes: putter_process.join() stat_queue.put(None) stat_queue.close() statter_process.join() put_queue.join_thread() stat_queue.join_thread()
def main(argv=None): if argv is None: argv = sys.argv parser = OptionParser() group = OptionGroup(parser, 'S3 options') group.add_option('--bucket', metavar='BUCKET', help='set bucket') group.add_option( '--bucket_region', default='us-east-1', help='set bucket region if not in us-east-1 (default new bucket region)' ) group.add_option('--host', default='s3.amazonaws.com', help='set AWS host name') group.add_option('--insecure', action='store_false', dest='secure', help='use insecure connection') group.add_option('--secure', action='store_true', default=True, dest='secure', help='use secure connection') parser.add_option_group(group) group = OptionGroup(parser, 'Source options') group.add_option('--walk', choices=('filesystem', 'tar', 's3'), default='filesystem', metavar='MODE', help='set walk mode (filesystem or tar)') group.add_option('--exclude', action='append', default=[], metavar='PATTERN', help='exclude files matching PATTERN') group.add_option('--include', action='append', default=[], metavar='PATTERN', help='don\'t exclude files matching PATTERN') parser.add_option_group(group) group = OptionGroup(parser, 'Put options') group.add_option( '--content-type', default='guess', metavar='CONTENT-TYPE', help='set content type, set to "guess" to guess based on file name ' 'or "magic" to guess by filename and libmagic.') group.add_option('--gzip', action='store_true', help='gzip values and set content encoding') group.add_option( '--gzip-type', action='append', default=[], help='if --gzip is set, sets what content-type to gzip, defaults ' 'to a list of known text content types, "all" will gzip everything.' ' Specify multiple times for multiple content types. ' '[default: "guess"]') group.add_option('--put', choices=('add', 'stupid', 'update', 'copy'), default='update', metavar='MODE', help='set put mode (add, stupid, copy or update)') group.add_option('--prefix', default='', metavar='PREFIX', help='set key prefix') group.add_option('--resume', action='append', default=[], metavar='FILENAME', help='resume from log file') group.add_option( '--grant', metavar='GRANT', default=None, choices=CannedACLStrings, help= 'A canned ACL policy to be applied to each file uploaded.\nChoices: %s' % ', '.join(CannedACLStrings)) group.add_option( '--header', metavar='HEADER:VALUE', dest='headers', action='append', help='extra headers to add to the file, can be specified multiple times' ) group.add_option('--encrypt-key', action='store_true', default=False, dest='encrypt_key', help='use server side encryption') parser.add_option_group(group) group = OptionGroup(parser, 'Logging options') group.add_option('--log-filename', metavar='FILENAME', help='set log filename') group.add_option('--quiet', '-q', action='count', default=0, help='less output') group.add_option('--verbose', '-v', action='count', default=0, help='more output') parser.add_option_group(group) group = OptionGroup(parser, 'Debug and performance tuning options') group.add_option('--dry-run', action='store_true', help='don\'t write to S3') group.add_option('--limit', metavar='N', type=int, help='set maximum number of keys to put') group.add_option('--processes', default=8, metavar='PROCESSES', type=int, help='set number of putter processes') parser.add_option_group(group) options, args = parser.parse_args(argv[1:]) logging.basicConfig(filename=options.log_filename, level=logging.INFO + 10 * (options.quiet - options.verbose)) logger = logging.getLogger(os.path.basename(sys.argv[0])) if len(args) < 1: logger.error('missing source operand') return 1 if not options.bucket: logger.error('missing bucket') return 1 if not options.bucket_region: options.bucket_region = 'us-east-1' connection = boto.s3.connect_to_region( options.bucket_region, aws_access_key_id=os.environ.get('AWS_ACCESS_KEY_ID'), aws_secret_access_key=os.environ.get('AWS_SECRET_ACCESS_KEY'), is_secure=True, calling_format=boto.s3.connection.OrdinaryCallingFormat(), ) import ssl if hasattr(ssl, '_create_unverified_context'): ssl._create_default_https_context = ssl._create_unverified_context bucket = connection.get_bucket(options.bucket) del bucket del connection start = time.time() put_queue = JoinableQueue(1024 * options.processes) stat_queue = JoinableQueue() walk = { 'filesystem': walk_filesystem, 'tar': walk_tar, 's3': walk_s3 }[options.walk] walker_process = Process(target=walker, args=(walk, put_queue, args, options)) walker_process.start() put = { 'add': put_add, 'stupid': put_stupid, 'update': put_update, 'copy': put_copy }[options.put] putter_processes = list( islice( repeatedly(Process, target=putter, args=(put, put_queue, stat_queue, options)), options.processes)) for putter_process in putter_processes: putter_process.start() statter_process = Process(target=statter, args=(stat_queue, start, options)) statter_process.start() walker_process.join() for putter_process in putter_processes: put_queue.put(None) put_queue.close() for putter_process in putter_processes: putter_process.join() stat_queue.put(None) stat_queue.close() statter_process.join() put_queue.join_thread() stat_queue.join_thread()