Exemple #1
0
    def run(self, *args):
        """ kickoff the program """
        self.add_args()

        if len(args) > 0:
            program_args = self.parser.parse_args(args)
        else:
            program_args = self.parser.parse_args()

        # setup the mongoDB connection
        mongo_connection = GritsMongoConnection(program_args)

        # Confirm the user wants to apply the indexes
        confirm = True
        if not program_args.force:
            confirm = self.query_yes_no(
                "This will lock the database.  Are your sure?", "no")
        if confirm:
            # ensure that the indexes are applied to the collections
            pool = ThreadPool(nodes=1)
            results = pool.amap(mongo_connection.ensure_indexes, [None])

            while not results.ready():
                # command-line spinner
                for cursor in '|/-\\':
                    sys.stdout.write('\b%s' % cursor)
                    sys.stdout.flush()
                    time.sleep(.25)

            sys.stdout.write('\b')
            sys.stdout.flush()
            # async-poll is done, get the results
            result = results.get()
            logging.info(result)
Exemple #2
0
def GMM_Ineq_parall(Theta0, DATA_STRUCT, d_struct):
    Theta = {
        "comm_mu": Theta0[0],
        "priv_mu": Theta0[1],
        "epsilon_mu": Theta0[2],
        "comm_var": Theta0[3],
        "priv_var": Theta0[4],
        "epsilon_var": Theta0[5],
    }

    rng = np.random.RandomState(d_struct['rng_seed'])

    start = time.time()

    print('--------------------------------------------------------')
    print('current parameter set are :')
    print(Theta)
    '''
    parallel programming with two levels
        data separating
        runing the estimation
    '''
    data_n = len(DATA_STRUCT)

    work_pool = ThreadPool(nodes=data_n)

    cpu_num = multiprocessing.cpu_count()

    cpu_num_node = int((cpu_num - 1) / data_n)
    # change the submit to mpa so that we can run multi-part altogether
    results = work_pool.amap(
        partial(para_data_allo_1, Theta, cpu_num_node, rng, d_struct),
        iter(DATA_STRUCT))
    work_pool.close()
    while not results.ready():
        time.sleep(5)
        print(".")


#    work_pool.join()

    auction_result = np.nanmean(list(results.get()))

    end = time.time()

    print("object value : " + str(auction_result))
    print("time spend in this loop: ")
    print(end - start)
    print('--------------------------------------------------------\n')

    ## save the parameters and objective value

    with open('para.txt', 'a+') as f:
        for item in Theta0:
            f.write("%f\t" % item)

        f.write("%f\t" % auction_result)
        f.write("%f\n" % (end - start) / 60)

    return auction_result
    def run(self, *args):
        """ kickoff the program """
        self.add_args()
        
        if len(args) > 0:
            program_args = self.parser.parse_args(args)
        else:
            program_args = self.parser.parse_args()
        
        # setup the mongoDB connection
        mongo_connection = GritsMongoConnection(program_args)
        
        # Confirm the user wants to apply the indexes
        confirm = True
        if not program_args.force:
            confirm = self.query_yes_no("This will lock the database.  Are your sure?", "no")
        if confirm:
            # ensure that the indexes are applied to the collections
            pool = ThreadPool(nodes=1)
            results = pool.amap(mongo_connection.ensure_indexes, [None])
            
            while not results.ready():
                # command-line spinner
                for cursor in '|/-\\':
                    sys.stdout.write('\b%s' % cursor)
                    sys.stdout.flush()
                    time.sleep(.25)

            sys.stdout.write('\b')
            sys.stdout.flush()
            # async-poll is done, get the results
            result = results.get()
            logging.info(result)
    def process(self, mongo_connection):
        """ process a chunk of rows in the file """
        reader = UnicodeReader(self.program_arguments.infile,
                               dialect=self.provider_type.dialect)
        self.find_header(reader)

        for chunk in GritsFileReader.gen_chunks(reader, mongo_connection):
            # collections of valid and invaid records to be batch upsert / insert many
            valid_records = []
            invalid_records = []
            # is threading enabled?  this may increase performance when mongoDB
            # is not running on localhost due to busy wait on finding an airport
            # in the case of FlightGlobalType.
            if settings._THREADING_ENABLED:
                pool = ThreadPool(nodes=settings._NODES)
                results = pool.amap(self.process_row, chunk)

                while not results.ready():
                    # command-line spinner
                    for cursor in '|/-\\':
                        sys.stdout.write('\b%s' % cursor)
                        sys.stdout.flush()
                        time.sleep(.25)

                sys.stdout.write('\b')
                sys.stdout.flush()
                # async-poll is done, get the results
                result = results.get()
                valid_records = [x[0] for x in result if x[0] is not None]
                invalid_records = [x[1] for x in result if x[1] is not None]

            else:
                # single-threaded synchronous processing
                for data in chunk:
                    valid, invalid = self.process_row(data)
                    if valid != None: valid_records.append(valid)
                    if invalid != None: invalid_records.append(invalid)

            # bulk upsert / inset many of the records
            valid_result = mongo_connection.bulk_upsert(
                self.provider_type.collection_name, valid_records)
            invalid_result = mongo_connection.insert_many(
                settings._INVALID_RECORD_COLLECTION_NAME, invalid_records)
            logging.debug('valid_result: %r', valid_result)
            logging.debug('invalid_result: %r', invalid_result)
    def process(self, mongo_connection):
        """ process a chunk of rows in the file """
        reader = UnicodeReader(self.program_arguments.infile, dialect=self.provider_type.dialect)
        self.find_header(reader)

        for chunk in GritsFileReader.gen_chunks(reader, mongo_connection):
            # collections of valid and invaid records to be batch upsert / insert many
            valid_records = []
            invalid_records = []
            # is threading enabled?  this may increase performance when mongoDB
            # is not running on localhost due to busy wait on finding an airport
            # in the case of FlightGlobalType.
            if settings._THREADING_ENABLED:
                pool = ThreadPool(nodes=settings._NODES)
                results = pool.amap(self.process_row, chunk)

                while not results.ready():
                    # command-line spinner
                    for cursor in '|/-\\':
                        sys.stdout.write('\b%s' % cursor)
                        sys.stdout.flush()
                        time.sleep(.25)

                sys.stdout.write('\b')
                sys.stdout.flush()
                # async-poll is done, get the results
                result = results.get()
                valid_records = [ x[0] for x in result if x[0] is not None ]
                invalid_records = [ x[1] for x in result if x[1] is not None ]

            else:
                # single-threaded synchronous processing
                for data in chunk:
                    valid, invalid = self.process_row(data)
                    if valid != None: valid_records.append(valid)
                    if invalid != None: invalid_records.append(invalid)

            # bulk upsert / inset many of the records
            valid_result = mongo_connection.bulk_upsert(self.provider_type.collection_name, valid_records)
            invalid_result = mongo_connection.insert_many(settings._INVALID_RECORD_COLLECTION_NAME, invalid_records)
            logging.debug('valid_result: %r', valid_result)
            logging.debug('invalid_result: %r', invalid_result)
Exemple #6
0
from pathos.threading import ThreadPool
import time
pool = ThreadPool(nodes=4)

# do a blocking map on the chosen function
print(pool.map(pow, [1,2,3,4], [5,6,7,8]))

# do a non-blocking map, then extract the results from the iterator
results = pool.imap(pow, [1,2,3,4], [5,6,7,8])
print("...")
print(list(results))


# do an asynchronous map, then get the results
results = pool.amap(pow, [1,2,3,4], [5,6,7,8])
while not results.ready():
    time.sleep(5)
    print(".")

print(results.get())


# do one item at a time, using a pipe

print(pool.pipe(pow, 1, 5))
print(pool.pipe(pow, 2, 6))

# do one item at a time, using an asynchronous pipe

result1 = pool.apipe(pow, 1, 5)
result2 = pool.apipe(pow, 2, 6)
Exemple #7
0
def make_patches(data_root, patches_root, patch_size, outline_filled=None, remove_filled=False, min_widths=('def',),
                 mirror=True, rotations=(0,), translations=((0, 0),), distinguishability_threshold=.5, num_workers=0,
                 random_samples=None, leave_width_percentile=None):
    if num_workers != 0:
        from pathos.multiprocessing import cpu_count, ProcessingPool
        from pathos.threading import ThreadPool
        if num_workers == -1:
            optimal_workers = cpu_count() - 1
            workers_pool = ProcessingPool(optimal_workers)
        else:
            workers_pool = ProcessingPool(num_workers)
        print(f'Workers pool: {workers_pool}')

        savers_pool = ThreadPool(1)
        saving_patches_in_bg = savers_pool.amap(lambda a: None, [])
    else:
        workers_pool = 0

    path = lambda basename, origin, width='def', ori='def', rot=0, t=(0, 0): os.path.join(patches_root, basename,
                                                                                          '{}x{}'.format(*patch_size),
                                                                                          'width_{}'.format(width),
                                                                                          'orientation_{}'.format(ori),
                                                                                          'rotated_deg_{}'.format(rot),
                                                                                          'translated_{}_{}'.format(*t),
                                                                                          '{}_{}.svg'.format(*origin))

    orientations = ['def']
    if mirror:
        orientations.append('mir')

    if random_samples is not None:
        min_widths_all = deepcopy(min_widths)
        orientations_all = deepcopy(orientations)
        rotations_all = deepcopy(rotations)
        translations_all = deepcopy(translations)

    source_images = glob(os.path.join(data_root, '**', '*.svg'), recursive=True)
    for file in source_images:
        print('Processing file {}'.format(file))
        basename = file[len(data_root) + 1:-4]  # split data_root and extension

        vector_image = VectorImage.from_svg(file)
        if remove_filled:
            vector_image.remove_filled()
        if outline_filled is not None:
            vector_image.leave_only_contours(outline_filled)
        if leave_width_percentile is not None:
            vector_image.leave_width_percentile(leave_width_percentile)

        if random_samples is not None:
            min_widths = np.random.choice(min_widths_all, size=min(random_samples, len(min_widths_all)), replace=False)
            orientations = np.random.choice(orientations_all, size=min(random_samples, len(orientations_all)),
                                            replace=False)
            rotations = np.random.choice(rotations_all, size=min(random_samples, len(rotations_all)), replace=False)
            translations = translations_all[
                np.random.choice(len(translations_all), size=min(random_samples, len(translations_all)), replace=False)]

        for width in min_widths:
            print('\twidth {}'.format(width))
            if width == 'def':
                vector_image_scaled = vector_image
            else:
                vector_image_scaled = vector_image.copy()
                vector_image_scaled.scale_to_width('min', width)
            for orientation in orientations:
                print('\t\torientation {}'.format(orientation))
                if orientation == 'def':
                    vector_image_reoriented = vector_image_scaled
                else:
                    vector_image_reoriented = vector_image_scaled.mirrored()
                for rotation in rotations:
                    print('\t\t\trotation {}'.format(rotation))
                    vector_image_rotated = vector_image_reoriented.rotated(rotation, adjust_view=True)
                    for translation in translations:
                        print('\t\t\t\ttranslation {}'.format(translation))
                        vector_image_translated = vector_image_rotated.translated(translation, adjust_view=True)

                        vector_patches = vector_image_translated.split_to_patches(patch_size, workers=workers_pool)
                        if num_workers != 0:
                            print('\t\t\t\t\twaiting for previous batch to be saved')
                            saving_patches_in_bg.get()

                        def simplify_and_save(vector_patch, basename=basename, width=width, orientation=orientation,
                                              rotation=rotation, translation=translation):
                            vector_patch.simplify_segments(distinguishability_threshold=distinguishability_threshold)
                            if len(vector_patch.paths) == 0:
                                return
                            save_path = path(basename,
                                             (int(vector_patch.x.as_pixels()), int(vector_patch.y.as_pixels())), width,
                                             orientation, rotation, translation)
                            os.makedirs(os.path.dirname(save_path), exist_ok=True)
                            vector_patch.save(save_path)

                        if num_workers == 0:
                            print('\t\t\t\t\tsaving patches')
                            for vector_path in vector_patches.reshape(-1):
                                simplify_and_save(vector_path)
                        else:
                            print('\t\t\t\t\tsaving patches')
                            saving_patches_in_bg = savers_pool.amap(simplify_and_save, vector_patches.reshape(-1))

    if num_workers != 0:
        workers_pool.close()
        workers_pool.join()
        workers_pool.clear()

        savers_pool.close()
        savers_pool.join()
        savers_pool.clear()