def run(self, *args): """ kickoff the program """ self.add_args() if len(args) > 0: program_args = self.parser.parse_args(args) else: program_args = self.parser.parse_args() # setup the mongoDB connection mongo_connection = GritsMongoConnection(program_args) # Confirm the user wants to apply the indexes confirm = True if not program_args.force: confirm = self.query_yes_no( "This will lock the database. Are your sure?", "no") if confirm: # ensure that the indexes are applied to the collections pool = ThreadPool(nodes=1) results = pool.amap(mongo_connection.ensure_indexes, [None]) while not results.ready(): # command-line spinner for cursor in '|/-\\': sys.stdout.write('\b%s' % cursor) sys.stdout.flush() time.sleep(.25) sys.stdout.write('\b') sys.stdout.flush() # async-poll is done, get the results result = results.get() logging.info(result)
def GMM_Ineq_parall(Theta0, DATA_STRUCT, d_struct): Theta = { "comm_mu": Theta0[0], "priv_mu": Theta0[1], "epsilon_mu": Theta0[2], "comm_var": Theta0[3], "priv_var": Theta0[4], "epsilon_var": Theta0[5], } rng = np.random.RandomState(d_struct['rng_seed']) start = time.time() print('--------------------------------------------------------') print('current parameter set are :') print(Theta) ''' parallel programming with two levels data separating runing the estimation ''' data_n = len(DATA_STRUCT) work_pool = ThreadPool(nodes=data_n) cpu_num = multiprocessing.cpu_count() cpu_num_node = int((cpu_num - 1) / data_n) # change the submit to mpa so that we can run multi-part altogether results = work_pool.amap( partial(para_data_allo_1, Theta, cpu_num_node, rng, d_struct), iter(DATA_STRUCT)) work_pool.close() while not results.ready(): time.sleep(5) print(".") # work_pool.join() auction_result = np.nanmean(list(results.get())) end = time.time() print("object value : " + str(auction_result)) print("time spend in this loop: ") print(end - start) print('--------------------------------------------------------\n') ## save the parameters and objective value with open('para.txt', 'a+') as f: for item in Theta0: f.write("%f\t" % item) f.write("%f\t" % auction_result) f.write("%f\n" % (end - start) / 60) return auction_result
def run(self, *args): """ kickoff the program """ self.add_args() if len(args) > 0: program_args = self.parser.parse_args(args) else: program_args = self.parser.parse_args() # setup the mongoDB connection mongo_connection = GritsMongoConnection(program_args) # Confirm the user wants to apply the indexes confirm = True if not program_args.force: confirm = self.query_yes_no("This will lock the database. Are your sure?", "no") if confirm: # ensure that the indexes are applied to the collections pool = ThreadPool(nodes=1) results = pool.amap(mongo_connection.ensure_indexes, [None]) while not results.ready(): # command-line spinner for cursor in '|/-\\': sys.stdout.write('\b%s' % cursor) sys.stdout.flush() time.sleep(.25) sys.stdout.write('\b') sys.stdout.flush() # async-poll is done, get the results result = results.get() logging.info(result)
def process(self, mongo_connection): """ process a chunk of rows in the file """ reader = UnicodeReader(self.program_arguments.infile, dialect=self.provider_type.dialect) self.find_header(reader) for chunk in GritsFileReader.gen_chunks(reader, mongo_connection): # collections of valid and invaid records to be batch upsert / insert many valid_records = [] invalid_records = [] # is threading enabled? this may increase performance when mongoDB # is not running on localhost due to busy wait on finding an airport # in the case of FlightGlobalType. if settings._THREADING_ENABLED: pool = ThreadPool(nodes=settings._NODES) results = pool.amap(self.process_row, chunk) while not results.ready(): # command-line spinner for cursor in '|/-\\': sys.stdout.write('\b%s' % cursor) sys.stdout.flush() time.sleep(.25) sys.stdout.write('\b') sys.stdout.flush() # async-poll is done, get the results result = results.get() valid_records = [x[0] for x in result if x[0] is not None] invalid_records = [x[1] for x in result if x[1] is not None] else: # single-threaded synchronous processing for data in chunk: valid, invalid = self.process_row(data) if valid != None: valid_records.append(valid) if invalid != None: invalid_records.append(invalid) # bulk upsert / inset many of the records valid_result = mongo_connection.bulk_upsert( self.provider_type.collection_name, valid_records) invalid_result = mongo_connection.insert_many( settings._INVALID_RECORD_COLLECTION_NAME, invalid_records) logging.debug('valid_result: %r', valid_result) logging.debug('invalid_result: %r', invalid_result)
def process(self, mongo_connection): """ process a chunk of rows in the file """ reader = UnicodeReader(self.program_arguments.infile, dialect=self.provider_type.dialect) self.find_header(reader) for chunk in GritsFileReader.gen_chunks(reader, mongo_connection): # collections of valid and invaid records to be batch upsert / insert many valid_records = [] invalid_records = [] # is threading enabled? this may increase performance when mongoDB # is not running on localhost due to busy wait on finding an airport # in the case of FlightGlobalType. if settings._THREADING_ENABLED: pool = ThreadPool(nodes=settings._NODES) results = pool.amap(self.process_row, chunk) while not results.ready(): # command-line spinner for cursor in '|/-\\': sys.stdout.write('\b%s' % cursor) sys.stdout.flush() time.sleep(.25) sys.stdout.write('\b') sys.stdout.flush() # async-poll is done, get the results result = results.get() valid_records = [ x[0] for x in result if x[0] is not None ] invalid_records = [ x[1] for x in result if x[1] is not None ] else: # single-threaded synchronous processing for data in chunk: valid, invalid = self.process_row(data) if valid != None: valid_records.append(valid) if invalid != None: invalid_records.append(invalid) # bulk upsert / inset many of the records valid_result = mongo_connection.bulk_upsert(self.provider_type.collection_name, valid_records) invalid_result = mongo_connection.insert_many(settings._INVALID_RECORD_COLLECTION_NAME, invalid_records) logging.debug('valid_result: %r', valid_result) logging.debug('invalid_result: %r', invalid_result)
from pathos.threading import ThreadPool import time pool = ThreadPool(nodes=4) # do a blocking map on the chosen function print(pool.map(pow, [1,2,3,4], [5,6,7,8])) # do a non-blocking map, then extract the results from the iterator results = pool.imap(pow, [1,2,3,4], [5,6,7,8]) print("...") print(list(results)) # do an asynchronous map, then get the results results = pool.amap(pow, [1,2,3,4], [5,6,7,8]) while not results.ready(): time.sleep(5) print(".") print(results.get()) # do one item at a time, using a pipe print(pool.pipe(pow, 1, 5)) print(pool.pipe(pow, 2, 6)) # do one item at a time, using an asynchronous pipe result1 = pool.apipe(pow, 1, 5) result2 = pool.apipe(pow, 2, 6)
def make_patches(data_root, patches_root, patch_size, outline_filled=None, remove_filled=False, min_widths=('def',), mirror=True, rotations=(0,), translations=((0, 0),), distinguishability_threshold=.5, num_workers=0, random_samples=None, leave_width_percentile=None): if num_workers != 0: from pathos.multiprocessing import cpu_count, ProcessingPool from pathos.threading import ThreadPool if num_workers == -1: optimal_workers = cpu_count() - 1 workers_pool = ProcessingPool(optimal_workers) else: workers_pool = ProcessingPool(num_workers) print(f'Workers pool: {workers_pool}') savers_pool = ThreadPool(1) saving_patches_in_bg = savers_pool.amap(lambda a: None, []) else: workers_pool = 0 path = lambda basename, origin, width='def', ori='def', rot=0, t=(0, 0): os.path.join(patches_root, basename, '{}x{}'.format(*patch_size), 'width_{}'.format(width), 'orientation_{}'.format(ori), 'rotated_deg_{}'.format(rot), 'translated_{}_{}'.format(*t), '{}_{}.svg'.format(*origin)) orientations = ['def'] if mirror: orientations.append('mir') if random_samples is not None: min_widths_all = deepcopy(min_widths) orientations_all = deepcopy(orientations) rotations_all = deepcopy(rotations) translations_all = deepcopy(translations) source_images = glob(os.path.join(data_root, '**', '*.svg'), recursive=True) for file in source_images: print('Processing file {}'.format(file)) basename = file[len(data_root) + 1:-4] # split data_root and extension vector_image = VectorImage.from_svg(file) if remove_filled: vector_image.remove_filled() if outline_filled is not None: vector_image.leave_only_contours(outline_filled) if leave_width_percentile is not None: vector_image.leave_width_percentile(leave_width_percentile) if random_samples is not None: min_widths = np.random.choice(min_widths_all, size=min(random_samples, len(min_widths_all)), replace=False) orientations = np.random.choice(orientations_all, size=min(random_samples, len(orientations_all)), replace=False) rotations = np.random.choice(rotations_all, size=min(random_samples, len(rotations_all)), replace=False) translations = translations_all[ np.random.choice(len(translations_all), size=min(random_samples, len(translations_all)), replace=False)] for width in min_widths: print('\twidth {}'.format(width)) if width == 'def': vector_image_scaled = vector_image else: vector_image_scaled = vector_image.copy() vector_image_scaled.scale_to_width('min', width) for orientation in orientations: print('\t\torientation {}'.format(orientation)) if orientation == 'def': vector_image_reoriented = vector_image_scaled else: vector_image_reoriented = vector_image_scaled.mirrored() for rotation in rotations: print('\t\t\trotation {}'.format(rotation)) vector_image_rotated = vector_image_reoriented.rotated(rotation, adjust_view=True) for translation in translations: print('\t\t\t\ttranslation {}'.format(translation)) vector_image_translated = vector_image_rotated.translated(translation, adjust_view=True) vector_patches = vector_image_translated.split_to_patches(patch_size, workers=workers_pool) if num_workers != 0: print('\t\t\t\t\twaiting for previous batch to be saved') saving_patches_in_bg.get() def simplify_and_save(vector_patch, basename=basename, width=width, orientation=orientation, rotation=rotation, translation=translation): vector_patch.simplify_segments(distinguishability_threshold=distinguishability_threshold) if len(vector_patch.paths) == 0: return save_path = path(basename, (int(vector_patch.x.as_pixels()), int(vector_patch.y.as_pixels())), width, orientation, rotation, translation) os.makedirs(os.path.dirname(save_path), exist_ok=True) vector_patch.save(save_path) if num_workers == 0: print('\t\t\t\t\tsaving patches') for vector_path in vector_patches.reshape(-1): simplify_and_save(vector_path) else: print('\t\t\t\t\tsaving patches') saving_patches_in_bg = savers_pool.amap(simplify_and_save, vector_patches.reshape(-1)) if num_workers != 0: workers_pool.close() workers_pool.join() workers_pool.clear() savers_pool.close() savers_pool.join() savers_pool.clear()