def align_dataset(config_file: str):
    """Aligns an image dataset
    """
    config = AlignConfig(config_file)
    output_dir = os.path.expanduser(config.output_dir)
    os.makedirs(output_dir, exist_ok=True)

    dataset = get_dataset(config.input_dir)

    num_images = sum(len(i) for i in dataset)
    TIMER.max_value = num_images
    TIMER.start()

    num_processes = cast(int, min(config.num_processes, os.cpu_count()))
    if num_processes == -1:
        num_processes = os.cpu_count()
    if num_processes > 1:
        process_pool = ProcessPool(num_processes)
        process_pool.imap(align_person, zip(dataset, [config] * len(dataset)))
        process_pool.close()
        process_pool.join()
    else:
        for person in dataset:
            align_person((person, config))

    transform_to_lfw_format(output_dir, num_processes)

    TIMER.finish()
    print('Total number of images: %d' % int(NUM_IMAGES_TOTAL.value))
    print('Number of faces found and aligned: %d' % int(NUM_SUCESSFUL.value))
    print('Number of unsuccessful: %d' % int(NUM_UNSECESSFUL.value))
def pare_multi_process(urls_and_people):
    print("[INFO] Paring and downlaoding all image urls with {} processes".
          format(NUM_PROCESSES))
    urls, person = zip(*urls_and_people)
    pare_pool = ProcessPool(NUM_PROCESSES)
    pare_pool.imap(safe_pare_matches_and_download, urls, person)
    pare_pool.close()
    pare_pool.join()
    print("[INFO] Done paring and downlaoding all image urls")
Example #3
0
def make_cache():
    from grid2viz.src.manager import (
        scenarios,
        agents,
        make_episode_without_decorate,
        n_cores,
        retrieve_episode_from_disk,
        save_in_ram_cache,
        cache_dir,
    )

    from pathos.multiprocessing import ProcessPool

    if not os.path.exists(cache_dir):
        print(
            "Starting Multiprocessing for reading the best agent of each scenario"
        )

    # TODO: tous les agents n'ont pas forcément tourner sur exactement tous les mêmes scenarios
    # Eviter une erreur si un agent n'a pas tourné sur un scenario
    agent_scenario_list = [(agent, scenario) for agent in agents
                           for scenario in scenarios]

    agents_data = []
    if n_cores == 1:  # no multiprocess useful for debug if needed
        i = 0
        for agent_scenario in agent_scenario_list:
            agents_data.append(
                make_episode_without_decorate(agent_scenario[0],
                                              agent_scenario[1]))
            i += 1
    else:
        pool = ProcessPool(n_cores)
        agents_data = list(
            pool.imap(
                make_episode_without_decorate,
                [agent_scenario[0]
                 for agent_scenario in agent_scenario_list],  # agents
                [agent_scenario[1] for agent_scenario in agent_scenario_list],
            )
        )  # scenarios #we go over all agents and all scenarios for each agent
        pool.close()
        print("Multiprocessing done")

    #####
    # saving data on disk
    i = 0
    for agent_scenario in agent_scenario_list:
        print(i)
        agent = agent_scenario[0]
        episode_name = agent_scenario[1]
        agent_episode = agents_data[i]
        if agent_episode is not None:
            episode_data = retrieve_episode_from_disk(
                agent_episode.episode_name, agent_episode.agent)

            agent_episode.decorate(episode_data)
            save_in_ram_cache(agent_episode.episode_name, agent_episode.agent,
                              agent_episode)
        i += 1
Example #4
0
    def normalize(self) -> None:
        """ Normalize ensemble """
        if not self.regenerate:
            try:
                self.load()
                return
            except FileNotFoundError:
                pass

        assert xor((self.normalizer_nld is not None
                    and self.normalizer_gsf is not None),
                   self.normalizer_simultan is not None), \
            "Either 'normalizer_nld' and 'normalizer_gsf' must be set, or " \
            "normalizer_simultan"

        gsfs = self.extractor.gsf
        nlds = self.extractor.nld

        self.LOG.info(f"Start normalization with {self.nprocesses} cpus")
        pool = ProcessPool(nodes=self.nprocesses)
        N = len(nlds)
        iterator = pool.imap(self.step, range(N), nlds, gsfs)
        self.res = list(tqdm(iterator, total=N))
        pool.close()
        pool.join()
        pool.clear()

        self.save()
Example #5
0
def getdates(year, par=False):
    ''' Use gsutil to read files for a specific year'''

    __bucket__ = 'earthenginepartners-hansen'
    __location__ = 'gs://%s/GLADalert/%d' % (__bucket__, year)

    dates = os.popen('gsutil ls %s' % __location__).read().split()

    print('number of dates: ', len(dates))

    ret = []

    if par:
        from pathos.multiprocessing import ProcessPool
        pool = ProcessPool(nodes=25)

        def pl(i):
            return os.popen('gsutil ls %s' % i).read().split()

        dates = pool.imap(pl, dates)

    else:
        (os.popen('gsutil ls %s' % i).read().split() for i in dates)

    for i in dates:
        ret.extend(i)

    return ret
Example #6
0
def run_process_by_pathos_nonblockingmap(pos_arr, dc_arr):
    this_function_name = inspect.currentframe().f_code.co_name
    print("Begin {}...".format (this_function_name))
    pool = ProcessPool(nodes=8)
    # do a non-blocking map, then extract the results from the iterator
    results = pool.imap(run_once, pos_arr, dc_arr) 
    list(results)
Example #7
0
    def start(self):
        dcm_list = dicom_list(self.input_directory, load=False)
        pool = ProcessPool(self.concurrency)

        # Eager-load all filenames mainly so we have the total count
        dcm_files = [dcm for dcm in dcm_list if dcm]
        iterator = pool.imap(self.sort, dcm_files)

        # If we aren't going to be showing output for each file, then show the progress bar
        if not self.verbose and not self.dry_run:
            iterator = tqdm.tqdm(iterator, total=len(dcm_files))

        for _ in iterator:
            pass
Example #8
0
def multiprocessing_imap(
    func: Callable,
    iterable: Iterable[Any],
    *,
    n_workers: int = -1,
    progressbar: bool = True,
    args=(),
    **kwargs
) -> List[Any]:
    """Execute func on each element in iterable on n_workers, ensuring order.

    Args:
        func: Function to apply to each element in iterable.
        iterable: Input iterable on which to execute func.
        n_workers: Amount of workers (processes) to spawn.
        progressbar: Whether to wrap the chunks in a tqdm.auto.tqdm.
        args: Additional positional arguments to pass to func.
        kwargs: Additional keyword arguments to pass to func.

    Returns:
        Results in same order as input iterable.
    """
    iterable = list(iterable)  # exhaust if iterable is a generator
    n_chunks = len(iterable)
    func = partial(func, *args, **kwargs)

    if n_chunks == 1 or n_workers == 1:
        # no sense spawning pool
        pool = None
        stage = map(func, iterable)
    else:
        n_workers = _choose_n_workers(n_chunks, n_workers)

        logger.debug("Starting ProcessPool with %d workers", n_workers)
        pool = ProcessPool(n_workers)

        stage = pool.imap(func, iterable)

    if progressbar:
        stage = tqdm(stage, total=n_chunks)

    try:
        return list(stage)
    finally:
        if pool:
            logger.debug("Closing ProcessPool")
            pool.clear()
Example #9
0
def main(args):
    if len(args.input) < 2:
        print("Please name at least one STAR file and an output directory")
        return 1

    if args.apix is None:
        print("Using pixel size computed from STAR files")

    def do_job(star):
        try:
            mrc = os.path.join(args.output,
                               os.path.basename(star).replace(".star", ".mrc"))
            print("Starting reconstruction of %s" % star)
            do_reconstruct(star, mrc, args.apix, args.sym, args.ctf)
            print("Wrote %s reconstruction to %s" % (star, mrc))
            if args.mask is not None:
                masked_mrc = mrc.replace(".mrc", "_masked.mrc")
                do_mask(mrc, masked_mrc, args.mask)
                print("Wrote masked map %s" % masked_mrc)
            if args.mask is not None and args.delete_unmasked:
                delete_unmasked(mrc, masked_mrc)
                print("Overwrote %s with %s" % (mrc, masked_mrc))
        except Exception as e:
            print("Failed on %s" % star)
        return 0

    pool = Pool(nodes=args.nproc)

    #pool.apipe(do_job, args.input)
    results = pool.imap(do_job, args.input)
    codes = list(results)

    if pool is not None:
        pool.close()
        pool.join()
        pool.terminate()

    return 0
Example #10
0
def compute_seq_distances(sequences, affinity=sequence_distance,
                          nb_jobs=NB_THREADS):
    """ compute matrix of all distances

    :param [] sequences: list of all sequences
    :param func affinity: function specify the sample affinity
    :param int nb_jobs: number jobs running in parallel
    :return ndarray:

    >>> ss = [['a', 'b', 'a', 'c'], ['a', 'a', 'b', 'a'], ['b', None, 'b', 'a']]
    >>> compute_seq_distances(ss, affinity=sequence_distance)
    array([[0.  , 0.25, 0.5 ],
           [0.25, 0.  , 0.25],
           [0.5 , 0.25, 0.  ]])
    >>> ss = [['hi', 'there', 'how', 'are', 'you'],
    ...       ['hi', 'how', 'are', 'you'],
    ...       ['hi', 'are', 'you', 'there']]
    >>> compute_seq_distances(ss)
    array([[0. , 0.2, 0.6],
           [0.2, 0. , 0.5],
           [0.6, 0.5, 0. ]])
    """
    idxs = [(i, j) for i in range(len(sequences))
            for j in range(i, len(sequences))]
    idx_lt = (((i, j), (sequences[i], sequences[j])) for i, j in idxs)
    dists = np.zeros((len(sequences), len(sequences)))

    _wrap_dist = partial(wrap_distance, similar_distance=affinity)
    pool = ProcessPool(nb_jobs)

    for idx, d in pool.imap(_wrap_dist, idx_lt):
        dists[idx[0], idx[1]] = d
        dists[idx[1], idx[0]] = d

    pool.close()
    pool.join()
    pool.clear()
    return dists
Example #11
0
def multiprocess(func: Callable,
                 all_urls: List[List[str]],
                 people: List[str],
                 total_count: int,
                 info: str) -> Tuple[List[List[str]], List[str]]:
    print("[INFO] {} with {} processes".format(info, NUM_PROCESSES))
    global TIMER
    widgets_match = ['{}: '.format(info), pb.Percentage(), ' ',
                     pb.Bar(marker=pb.RotatingMarker()), ' ', pb.ETA()]
    TIMER = pb.ProgressBar(widgets=widgets_match, maxval=total_count).start()

    if NUM_PROCESSES > 1:
        process_pool = ProcessPool(NUM_PROCESSES)
        urls_and_people = process_pool.imap(func, all_urls, people)
        process_pool.close()
        process_pool.join()
        filtered_urls, people = zip(*urls_and_people)
    else:
        filtered_urls = []
        for urls, person in zip(all_urls, people):
            filtered, person = func(urls, person)
            filtered_urls.append(filtered)
    print("[INFO] Done {}".format(info))
    return filtered_urls, people
Example #12
0
'''
Variation pathos, local method

Windows OS: Hangs/ multiprocess error with newer version of pathos
Mac OS:
Linux:
Debian (unclear because windows app but operated in same manner)

Cloud-based:
Repl.it: Works
Ideone.com: Fails-multiprocess error
'''

from multiprocess import freeze_support
from pathos.multiprocessing import ProcessPool


def f(vars):
    return vars[0]**vars[1]


if __name__ == "__main__":
    freeze_support()

    pool = ProcessPool(4)

    print(list(pool.imap(f, [(1, 5), (2, 8), (3, 9)])))
Example #13
0
    def _cmpt_lim_phsrand_obj_vals(self, phs_red_rate, idxs_sclr):

        beg_tm = default_timer()

        _ = phs_red_rate
        _ = idxs_sclr

        self._sett_lim_phsrand_dir.mkdir(exist_ok=True)

        ptrb_ratios = np.linspace(self._sett_lim_phsrand_ptrb_lbd,
                                  self._sett_lim_phsrand_ptrb_ubd,
                                  self._sett_lim_phsrand_n_ptrb_vals,
                                  endpoint=True)

        ptrb_obj_vals = np.empty((self._sett_lim_phsrand_n_ptrb_vals,
                                  self._sett_lim_phsrand_iters_per_atpt))

        n_cpus = min(self._sett_lim_phsrand_n_ptrb_vals,
                     self._sett_misc_n_cpus)

        ubd_sclr = 1.2
        search_attempts = 0
        ress = []
        sel_stat_ftn = getattr(np, self._alg_lim_phsrand_sel_stat)

        if self._vb:
            print('Attempt,', 'Perturb ratio,', '   Minimum,', '      Mean,',
                  '   Maximum')

        if n_cpus > 1:
            self._lock = Manager().Lock()

            mp_pool = ProcessPool(n_cpus)
            mp_pool.restart(True)

            for i in range(0, self._sett_lim_phsrand_n_ptrb_vals, n_cpus):

                end_idx = min(self._sett_lim_phsrand_n_ptrb_vals, n_cpus + i)

                assert i < end_idx, 'This was not supposed to happen!'

                search_attempts += end_idx - i

                # Don't use ret_mp_idxs, it will be inefficient.
                args_gen = ((j, ptrb_ratios[j]) for j in range(i, end_idx))

                ptrb_obj_vals_iter = (list(
                    mp_pool.imap(self._cmpt_lim_phsrand_obj_vals_single,
                                 args_gen)))

                ress.extend(ptrb_obj_vals_iter)

                if np.any([
                        sel_stat_ftn(ptrb_obj_vals_iter[k][1]) >=
                    (self._sett_lim_phsrand_obj_ubd * ubd_sclr)
                        for k in range(len(ptrb_obj_vals_iter))
                ]):

                    break

            mp_pool.close()
            mp_pool.join()

            self._lock = None

            mp_pool = None

        else:
            self._lock = Lock()

            for j in range(self._sett_lim_phsrand_n_ptrb_vals):
                search_attempts += 1

                ress.append(
                    self._cmpt_lim_phsrand_obj_vals_single(
                        (j, ptrb_ratios[j])))

                if (sel_stat_ftn(ress[-1][1]) >=
                    (self._sett_lim_phsrand_obj_ubd * ubd_sclr)):

                    break

            self._lock = None

        take_idxs = []
        for res in ress:
            take_idxs.append(res[0])
            ptrb_obj_vals[take_idxs[-1], :] = res[1]

        take_idxs.sort()
        take_idxs = np.array(take_idxs)

        ptrb_ratios = ptrb_ratios[take_idxs]
        ptrb_obj_vals = ptrb_obj_vals[take_idxs]

        res = ress = None

        assert np.all(
            np.isfinite(ptrb_ratios)), ('Invalid values in ptrb_ratios!')

        assert np.all(
            ptrb_ratios >= 0), ('Values less than zero in ptrb_ratios!')

        assert np.all(
            np.isfinite(ptrb_obj_vals)), ('Invalid values in ptrb_obj_vals!')

        assert np.all(
            ptrb_obj_vals >= 0), ('Values less than zero in ptrb_obj_vals!')

        self._alg_lim_phsrand_ptrb_ratios = ptrb_ratios
        self._alg_lim_phsrand_ptrb_obj_vals = ptrb_obj_vals

        self._set_lim_phsrand_ptrb_ratio()

        self._plot_lim_phsrand_obj_vals()

        end_tm = default_timer()

        if self._vb:
            print(f'Found perturbation ratio of '
                  f'{self._alg_lim_phsrand_ptrb_ratio:5.3E} in '
                  f'{end_tm - beg_tm:0.1f} '
                  f'seconds using {search_attempts} attempts.')

        return
Example #14
0
    def run(self,
            params,
            runfunc=None,
            parallel=True,
            n_thread=8,
            hide_progress=False):
        """ Main logical core of Simulator, accepting parameter list/array and returning simulation results.

        run() is designed to be a flexible method for running batches of simulations. run() takes the elements of its
        only required argument, params, and dispatches them to a function (either default or user-provided) that takes
        that element and returns a neural simulation result (e.g., instantaneous firing rate response). This dispatching
        can either be done in a traditional for loop or using a multiprocessing Pool. The latter means that batch
        simulations can be easily parallelized.

        The elements of params are generally assumed to be dicts that encode the information required to run
        simulations, as Simulator subclasses are expected to implement a simulate() method that accepts such a dict and
        returns a simulation result. This is because the default behavior of run() is to apply this simulate() method
        to each element of params. However, by passing a custom "runfunc" one can override this default behavior. In
        theory, one could pass *any* function as a runfunc and thereby use any subclass of Simulator to implement any
        simulation. However, this is not the intended use of this functionality. Instead, the user is expected to wrap
        the simulate() method with functions that extend or alter its functionality. This pattern creates a natural
        correspondence between any Simulator object and its simulate() method. That is, even when that method is
        extended with a custom runfunc, a user generally knows what simulation is being executed at the lowest level
        of the runfunc simply by knowing the Simulator in question. Many examples of this pattern are visible in the
        acceptance and replication tests in the test suite.

        run() returns an object of the same type as its input params (either a list or an array). This output is also
        of the same shape and size as the input params, and each element corresponds to the matching element of the
        input params.

        One disadvantage of the way that run() is currently implemented is that parallelization is only supported
        between elements of params. In other words, only a single thread can work on a single element of params.

        Args:
            params (list, ndarray): a list or ndarray whose elements are passed to runfunc
            runfunc (func): function that accepts kwargs and returns simulation results. If None is passed, the
                simulate() method bound to this object is used instead.
            parallel (bool): flag to control if we run the simulation in parallel. If true, elements of params are
                dispatched to separate threads using the pathos.multiprocessing library. Note that this functionality
                has a few side effects that must be carefully considered. First, some functionality (e.g., warnings)
                does not work in parallel, so sometimes turning parallel mode off can be helpful for troubleshooting.
                Second, randomization should be carefully considered if it plays a crucial role in a simulation.
                Some variants of random number generation/seeding are *not* thread-safe, meaning that you may get the
                same exact (nominally random) result multiple times across threads if random number generation is not
                implemented correctly.
            n_thread (int): number of threads to use in multiprocessing, ignored if parallel is false
            hide_progress (bool): flag to control if we want to display a tqdm progress bar

        Returns:
            results (list, ndarray): list or ndarray of results
        """
        # If runfunc is None, just use simulate() directly
        if runfunc is None:
            runfunc = self.simulate
        # If we pass Parameters object, extract underlying data and discard object shell
        if type(params) is Parameters:
            params = params.params
        # If parallel, set up the pool and run sequence on pool
        if parallel:
            p = ProcessPool(n_thread)
            if type(params) is list:
                results = list(
                    p.imap(
                        runfunc,
                        tqdm(params, disable=hide_progress,
                             total=len(params))))
            elif type(params) is np.ndarray:
                # For array params, we need flatten the array and then un-flatten it after output
                old_size = params.shape
                params = np.reshape(params, (params.size, ))
                results = list_to_array(
                    list(
                        p.imap(
                            runfunc,
                            tqdm(params,
                                 disable=hide_progress,
                                 total=len(params)))))
                results = np.reshape(results, old_size)
            else:
                raise TypeError('params should be a list or an array')
        # If not parallel, simply iterate over and run each element of the sequence
        else:
            if type(params) is list:
                results = [
                    runfunc(element) for element in tqdm(
                        params, disable=hide_progress, total=len(params))
                ]
            elif type(params) is np.ndarray:
                # For array params, we need flatten the array and then un-flatten it after output
                old_size = params.shape
                params = np.reshape(params, (params.size, ))
                results = list_to_array(
                    list(
                        map(
                            runfunc,
                            tqdm(params,
                                 disable=hide_progress,
                                 total=len(params)))))
                results = np.reshape(results, old_size)
            else:
                raise TypeError('params should be a list or an array')
        return results
Example #15
0
    def _find_path_internal(self) -> None:
        #print(datetime.datetime.now(), ' Line 68')
        kernels: List[Tuple[int, AlgorithmRunner]] = list(map(lambda kernel: (kernel[0],
                                                                              self._services.algorithm.get_new_runner(
                                                                                  copy.deepcopy(self._get_grid()),
                                                                                  OnlineLSTM, ([],
                                                                                               {"max_it": self._max_it,
                                                                                                "load_name": kernel[
                                                                                                    1]}),
                                                                                  BasicTesting,
                                                                                  with_animations=True)),
                                                              enumerate(self.kernel_names)))
        #print('Kernels ', kernels)
        
        #TODO: HANGS here
        # print('Line 86')
        if self._threaded:
            # threaded_jobs: List[Process] = list(
            #     map(lambda kernel: multip.Process(target=kernel[1].find_path, daemon=True), kernels))
            
            # print(datetime.datetime.now(), ' Line 85')
            # print('Threaded Jobs: ', threaded_jobs)
            #multip.set_start_method('spawn')
            # print('Kernals \n \n', kernels)
            i = 0
            # lambda kernel : kernel[1].find_path #function
            #Data = kernels 
            print("Parallel")
            Parallel(n_jobs=9)(self.kernels_cal(self,_) for _ in kernels)
            p = ProcessPool(10)
            sc = p.map(self.kernels_cal, kernels)
            pool = ProcessPool(nodes = 10)
            returned_results = pool.imap(self.test_multip,num)

            # returned_results = pool.imap(self.kernels_cal,kernels)

            # for j in threaded_jobs:
            #     i+=1 
            #     print('\n Started # ',i, j)
            #     j.start()
            #     # j.join()
            # i = 0
            # for j in threaded_jobs:
            #     i+=1
            #     print('\n Joined # ',i, j)
            #     j.join()
        else: #It goes here #TODO: Figure out why it hangs
            #print('Kernels: ',kernels) #iterates through 10 kernels (max it = 10)
            for k in kernels:
                #print('kernel is ', k) #Kernel is tuple with a number (0-10) and the algorithm ()?
                self.__active_kernel = k[1] #Problem is next three lines
                # self.t1 = datetime.datetime.now()
                k[1].find_path() #HANGS HERE!!! This takes 0.2 seconds
                # self.t2 = datetime.datetime.now()
                # print('Time: ', (self.t2-self.t1))
                self.__total_path = self.__total_path.union(set(map(lambda el: el.position, k[1].map.trace)))
        # print(datetime.datetime.now(), ' Line 96')
        # print('Gets to line 114')
        self.__active_kernel = None

        # check if any found path and if they did take smallest dist
        best_kernels: List[Tuple[int, AlgorithmRunner]] = []

        for kernel in kernels:
            if kernel[1].map.is_goal_reached(kernel[1].map.agent.position):
                best_kernels.append(kernel)
        #print(datetime.datetime.now(), ' Line 106')

        # take smallest dist kernel if any
        dist: float = float("inf")
        best_kernel: Tuple[int, AlgorithmRunner] = None
        for kernel in best_kernels:
            if dist > len(kernel[1].map.trace):
                dist = len(kernel[1].map.trace)
                best_kernel = kernel
        # print(datetime.datetime.now(), ' Line 115')
        if best_kernel:
            best_kernel[1].map.replay_trace(self.__replay)
        else:
            # pick the one with furthest progress
            dist = -1
            best_kernel = None
            #print('Kernels', kernels)
            for kernel in kernels:
                #print('Kernel', kernel)
                if dist < len(kernel[1].map.trace):
                    dist = len(kernel[1].map.trace)
                    best_kernel = kernel
            # print(datetime.datetime.now(), ' Line 126')
            best_kernel[1].map.replay_trace(self.__replay)
        self.kernel_call_idx = best_kernel[0]
Example #16
0
    critic.compile(optimizer=tf.keras.optimizers.SGD(0.0001), loss='mse')

    cg = CellGroup()

    for gen in range(GEN_ENDED_AT + 1, GEN_ENDS_AT + 1):
        print(f'Generation {gen}')

        print('Running Games...')

        weights = pickle.dumps(critic.get_weights())

        cs = list()

        with tqdm.tqdm(total=GAME_PER_GEN) as pbar:
            for i, dat in enumerate(
                    pool.imap(run_game,
                              itertools.repeat(weights, GAME_PER_GEN))):
                cs.extend(dat)
                pbar.update()

        print('Running Games Complete.')
        print('Processing Data...')

        if len(cg.cs) == NUM_REPLAY_BUF:
            cg.pop()

        cg.add(cs)

        total = cg.cl + 1

        dat = MyDataset(cg.s, cg.ss, cg.r, cg.a, total,
                        [(*NUM_GRID, NUM_CHANNEL), (1, ), (1, ), (1, )]).new()
Example #17
0
if __name__ == "__main__":

    def process_updates(x):
        from API_TTERMS import getconf2, query, testingKWT
        # tid = x[0]
        tid = 428
        testingKWT(tid, '144.167.35.89')

    conf = getconf2()
    q_trackers = f"select tid from trackers where userid = '*****@*****.**' or YEAR(date_created) in (2019,2020)"
    # q_trackers = f"select t.tid from trackers t left join tracker_keyword tk on  t.tid = tk.tid where t.tid is null or tk.tid is null or tk.status_percentage < 100 or tk.status != 1 or tk.status_percentage is null or tk.status is null"

    tracker_result = query(conf, q_trackers)
    if parallel:
        process_pool = ProcessPool(num_processes)
        for record in tqdm(process_pool.imap(process_updates, tracker_result),
                           desc="Terms",
                           ascii=True,
                           file=sys.stdout,
                           total=len(tracker_result)):
            pass
        process_pool.close()
        print("Joining pool")
        process_pool.join()
        print("Clearing pool")
        process_pool.clear()
        print("Finished!")
    else:
        for x in tqdm(tracker_result,
                      desc="Terms",
                      ascii=True,
Example #18
0
        # cursor.execute("select tid from trackers where tid not in (select tid from tracker_keyword)")
        records = cursor.fetchall()
    connection.close()

    if parallel_main:
        # pool = Pool(int(6))
        # pool.map(process_updates, records)
        def process_updates(x):
            from Utils.functions import clean_text, getconf2, updateStatus, getTopKWS, single_process, testingKWT
            num_processes = 24
            update__status = True
            parallel = False
            tid = x['tid']
            print(tid)
            # tid = 424
            testingKWT(tid, '144.167.35.89', parallel, update__status,
                       num_processes)

        process_pool = ProcessPool(num_processes_main)
        pbar = tqdm(process_pool.imap(process_updates, records),
                    desc="Terms_",
                    ascii=True,
                    file=sys.stdout,
                    total=len(records))
        for x in pbar:
            pbar.update(1)
    else:
        for x in tqdm(records, desc="Terms", ascii=True, file=sys.stdout):
            print(x)
            process_updates(x)
Example #19
0
    def generate(self, number: int, method: str = 'poisson',
                 regenerate: bool = False) -> None:
        """Generates an ensemble of matrices and estimates standard deviation

        Perturbs the initial raw matrix using either a Gaussian or Poisson
        process, unfolds them and applies the first generation method to them.
        Uses the variation to estimate standard deviation of each step.

        Args:
            number: The number of perturbed matrices to generate.
            method: The stochastic method to use to generate the perturbations
                Can be 'gaussian' or 'poisson'.
            regenerate: Whether to use already generated files (False) or
                generate them all anew (True).
        """
        assert self.raw is not None, "Set the raw matrix"
        assert self.unfolder is not None, "Set unfolder"
        assert self.first_generation_method is not None, \
            "Set first generation method"

        self.size = number
        self.regenerate = regenerate

        LOG.info(f"Start normalization with {self.nprocesses} cpus")
        pool = ProcessPool(nodes=self.nprocesses)
        ss = np.random.SeedSequence(self.seed)
        iterator = pool.imap(self.step, range(number), ss.spawn(number),
                             repeat(method))
        ensembles = np.array(list(tqdm(iterator, total=number)))
        pool.close()
        pool.join()
        pool.clear()

        raw_ensemble = ensembles[:, 0, :, :]
        unfolded_ensemble = ensembles[:, 1, :, :]
        firstgen_ensemble = ensembles[:, 2, :, :]

        # TODO Move this to a save step
        self.raw.save(self.path / 'raw.npy')
        # saving for firstgen is in step due to pickling
        self.firstgen = Matrix(path=self.path / 'firstgen.npy')

        # Calculate standard deviation
        raw_ensemble_std = np.std(raw_ensemble, axis=0)
        raw_std = Matrix(raw_ensemble_std, self.raw.Eg, self.raw.Ex,
                         state='std')
        raw_std.save(self.path / "raw_std.npy")

        unfolded_ensemble_std = np.std(unfolded_ensemble, axis=0)
        unfolded_std = Matrix(unfolded_ensemble_std, self.raw.Eg,
                              self.raw.Ex, state='std')
        unfolded_std.save(self.path / "unfolded_std.npy")

        firstgen_ensemble_std = np.std(firstgen_ensemble, axis=0)
        firstgen_std = Matrix(firstgen_ensemble_std, self.firstgen.Eg,
                              self.firstgen.Ex, state='std')
        firstgen_std.save(self.path / "firstgen_std.npy")

        self.std_raw = raw_std
        self.std_unfolded = unfolded_std
        self.std_firstgen = firstgen_std

        self.raw_ensemble = raw_ensemble
        self.unfolded_ensemble = unfolded_ensemble
        self.firstgen_ensemble = firstgen_ensemble
Example #20
0
    def load_scenario_cards(url):
        """
        Create and display html cards with scenario's kpi for
        the 15 first scenarios using cache file.
        """
        cards_list = []
        cards_count = 0
        episode_graph_layout = {
            "autosize": True,
            "showlegend": False,
            "xaxis": {
                "showticklabels": False
            },
            "yaxis": {
                "showticklabels": False
            },
            "margin": {
                "l": 0,
                "r": 0,
                "t": 0,
                "b": 0
            },
        }

        url_split = url.split("/")
        url_split = url_split[len(url_split) - 1]

        is_episode_page = url_split == "/" or url_split == "" or url_split == "episodes"
        start_time = time.time()
        if cards_count < 15 and is_episode_page:
            sorted_scenarios = list(sorted(scenarios))
            if not os.path.exists(cache_dir):
                print(
                    "Starting Multiprocessing for reading the best agent of each scenario"
                )
                pool = ProcessPool(n_cores)
                best_agents_data = list(
                    pool.imap(
                        make_episode_without_decorate,
                        [
                            best_agents[scenario]["agent"]
                            for scenario in sorted_scenarios
                        ],
                        sorted_scenarios,
                    ))
                pool.close()
                print("Multiprocessing done")
                for i, scenario in enumerate(sorted_scenarios):
                    best_agent_episode = best_agents_data[i]
                    episode_data = retrieve_episode_from_disk(
                        best_agent_episode.episode_name,
                        best_agent_episode.agent)
                    best_agent_episode.decorate(episode_data)
                    save_in_ram_cache(
                        best_agent_episode.episode_name,
                        best_agent_episode.agent,
                        best_agent_episode,
                    )

            for i, scenario in enumerate(sorted_scenarios):
                best_agent_episode = make_episode(
                    best_agents[scenario]["agent"], scenario)
                prod_share = EpisodeTrace.get_prod_share_trace(
                    best_agent_episode)
                consumption = best_agent_episode.profile_traces
                cards_list.append(
                    dbc.Col(
                        id=f"card_{scenario}",
                        lg=4,
                        width=12,
                        children=[
                            dbc.Card(
                                className="mb-3",
                                children=[
                                    dbc.CardBody([
                                        html.H5(
                                            className="card-title",
                                            children="Scenario {0}".format(
                                                scenario),
                                        ),
                                        dbc.Row(children=[
                                            dbc.Col(
                                                className="mb-4",
                                                children=[
                                                    html.P(
                                                        className=
                                                        "border-bottom h3 mb-0 text-right",
                                                        children=best_agents[
                                                            scenario]
                                                        ["out_of"],
                                                    ),
                                                    html.P(
                                                        className="text-muted",
                                                        children=
                                                        "Agents on Scenario",
                                                    ),
                                                ],
                                            ),
                                            dbc.Col(
                                                className="mb-4",
                                                children=[
                                                    html.P(
                                                        className=
                                                        "border-bottom h3 mb-0 text-right",
                                                        children="{}/{}".
                                                        format(
                                                            best_agents[
                                                                scenario]
                                                            ["value"],
                                                            meta_json[scenario]
                                                            ["chronics_max_timestep"],
                                                        ),
                                                    ),
                                                    html.P(
                                                        className="text-muted",
                                                        children=
                                                        "Agent's Survival",
                                                    ),
                                                ],
                                            ),
                                            dbc.Col(
                                                className="mb-4",
                                                children=[
                                                    html.P(
                                                        className=
                                                        "border-bottom h3 mb-0 text-right",
                                                        children=
                                                        f'{round(best_agents[scenario]["cum_reward"]):,}',
                                                    ),
                                                    html.P(
                                                        className="text-muted",
                                                        children=
                                                        "Cumulative Reward",
                                                    ),
                                                ],
                                            ),
                                            dbc.Col(
                                                className="mb-4",
                                                children=[
                                                    html.P(
                                                        className=
                                                        "border-bottom h3 mb-0 text-right",
                                                        children="{} min".
                                                        format(
                                                            round(
                                                                best_agent_episode
                                                                .
                                                                total_maintenance_duration
                                                            )),
                                                    ),
                                                    html.P(
                                                        className="text-muted",
                                                        children=
                                                        "Total Maintenance Duration",
                                                    ),
                                                ],
                                            ),
                                        ]),
                                        dbc.Row(
                                            className="align-items-center",
                                            children=[
                                                dbc.Col(
                                                    lg=4,
                                                    width=12,
                                                    children=[
                                                        html.H5(
                                                            "Production Share",
                                                            className=
                                                            "text-center",
                                                        ),
                                                        dcc.Graph(
                                                            style={
                                                                "height":
                                                                "150px"
                                                            },
                                                            figure=go.Figure(
                                                                layout=
                                                                episode_graph_layout,
                                                                data=prod_share,
                                                            ),
                                                        ),
                                                    ],
                                                ),
                                                dbc.Col(
                                                    lg=8,
                                                    width=12,
                                                    children=[
                                                        html.H5(
                                                            "Consumption Profile",
                                                            className=
                                                            "text-center",
                                                        ),
                                                        dcc.Graph(
                                                            style={
                                                                "height":
                                                                "150px"
                                                            },
                                                            figure=go.Figure(
                                                                layout=
                                                                episode_graph_layout,
                                                                data=
                                                                consumption,
                                                            ),
                                                        ),
                                                    ],
                                                ),
                                            ],
                                        ),
                                    ]),
                                    dbc.CardFooter(
                                        dbc.Button(
                                            "Open",
                                            id=scenario,
                                            key=scenario,
                                            className="btn-block",
                                            style={
                                                "background-color": "#2196F3"
                                            },
                                        )),
                                ],
                            )
                        ],
                    ))
                cards_count += 1
        print(
            "Initial loading time for the best agent of all scenarios = {:.1f} seconds"
            .format(time.time() - start_time))
        return cards_list
Example #21
0
import time
from pathos.multiprocessing import ProcessPool

# instantiate and configure the worker pool
pool = ProcessPool(nodes=3)

print "- Do a blocking (=synchronous) map on the chosen function"
print(pool.map(pow, [1, 2, 3, 4], [5, 6, 7, 8]))

print "- Do a non-blocking (=asynchronous) map, then get the results"
results = pool.amap(pow, [1, 2, 3, 4], [5, 6, 7, 8])
while not results.ready():
    time.sleep(1)
    print(".")
print(results.get())

print "- Do a non-blocking (=asynchronous) map, then extract the results from the iterator"
results = pool.imap(pow, [1, 2, 3, 4], [5, 6, 7, 8])
print("...")
print(list(results))

print "- Do one item at a time, using a pipe"
print(pool.pipe(pow, 1, 5))
print(pool.pipe(pow, 2, 6))

print "- Do one item at a time, using a non-blocking (=asynchronous) pipe"
result1 = pool.apipe(pow, 1, 5)
result2 = pool.apipe(pow, 2, 6)
print(result1.get())
print(result2.get())
Example #22
0
'''
Variation pathos, local method, iterated imap

Windows OS: Hangs/ multiprocess error with newer version of pathos
Mac OS:
Linux:
Debian Hangs

Cloud-based:
Repl.it: Works
Ideone.com: Fails-multiprocess error
'''

from multiprocess import freeze_support
from pathos.multiprocessing import ProcessPool


def f(vars):
    return vars[0]**vars[1]


if __name__ == "__main__":
    freeze_support()

    pool = ProcessPool(4)
    print("start")
    for run in pool.imap(f, [(1, 5), (2, 8), (3, 9)]):
        print(run)
Example #23
0
    def testingKWT(self, tid, ip, parallel, update__status, num_processes):
        conf = getconf2()
        s = SqlFuncs(conf)

        # Get blogsites in tracker
        connection = s.get_connection(conf)
        with connection.cursor() as cursor:
            cursor.execute(f"""select * from trackers where tid = {tid}""")
            records = cursor.fetchall()
            if records:
                query = records[0]['query']
                if 'blogsite_id in (' in query:
                    blog_ids = query[query.find("(") + 1:query.find(")")]
                    if blog_ids and 'NaN' not in blog_ids:
                        blog_ids = blog_ids[:-1] if ',' == blog_ids[
                            -1] else blog_ids

                        cursor.execute(
                            f"""select post from blogposts where blogsite_id in ({blog_ids})"""
                        )
                        records = cursor.fetchall()

                        # Get top terms from blog_ids
                        try:
                            terms_result = getTopKWS(blog_ids)
                        except Exception as e:
                            print(e)
                            try:
                                print('Retrying...')
                                terms_result = getTopKWS(blog_ids)
                                print('success')
                            except Exception as e:
                                terms_result = []
                                print(e)

                        # Count terms and group by year
                        if terms_result:
                            data_ = []
                            for term in terms_result:
                                PARAMS = term, blog_ids, tid
                                data_.append(PARAMS)

                            if parallel:
                                print("starting multi-process")
                                process_pool = ProcessPool(num_processes)
                                pbar = tqdm(process_pool.imap(
                                    single_process, data_),
                                            desc="Terms",
                                            ascii=True,
                                            file=sys.stdout,
                                            total=len(data_))
                                for x in pbar:
                                    pbar.update(1)

                                    # Update status on DB
                                    if update__status:
                                        status = round(
                                            (pbar.last_print_n / len(data_)) *
                                            100)
                                        if status <= 99 and status >= 90:
                                            status = 100
                                        updateStatus(status, tid)

                                process_pool.close()
                                print("Joining pool")
                                process_pool.join()
                                print("Clearing pool")
                                process_pool.clear()
                                print("Finished!")

                            else:
                                for x in data_:
                                    single_process(x)