コード例 #1
0
    def store_atom_enumeration(self, filename=None, multithread=False,
                               max_candidates=1):
        self.filename = filename
        DB = PrototypeSQL(filename=filename)
        DB._connect()
        N0 = DB.ase_db.count()

        prototypes = DB.select(max_atoms=self.max_atoms,
                               spacegroups=self.spacegroups,
                               source='prototype')
        Nprot = len(prototypes)

        pool = Pool()

        t0 = time.time()
        if multithread:
            res = pool.amap(self.store_atoms_for_prototype, prototypes)
            while not res.ready():
                N = DB.ase_db.count() - N0
                t = time.time() - t0
                N_per_t = N / t
                if N > 0:
                    print('---------------------------------')
                    print(
                        "{}/{} structures generated in {:.2f} sec".format(N, Nprot, t))
                    print("{} sec / structure".format(t / N))
                    print('Estimated time left: {:.2f} min'.format(
                        Nprot / N_per_t / 60))
                print('---------------------------------')
                time.sleep(10)
            res = res.get()
        else:
            for prototype in prototypes:
                self.store_atoms_for_prototype(prototype)
コード例 #2
0
 def fit(self, train_feats, train_labels, feat_names=None):
     if self.__feature_names is None:
         self.__feature_names = feat_names
     self.__train_features, self.__train_labels = train_feats, train_labels
     #self.__build_forest2()
     pool = Pool(cpu_count())
     results = pool.amap(self.__build_forest, range(self.__n_estimators))
     self.__trees = results.get()
コード例 #3
0
    def predict(self, test_features):
        if self.__train_features is None or self.__train_labels is None:
            raise Exception("Training Data not fitted.")

        self.__test_features = list(test_features)
        pool = Pool(cpu_count())
        self.__predictions = pool.amap(self.__k_nearest_neighbours, self.__test_features).get()
        return self.__predictions
コード例 #4
0
ファイル: dataframe.py プロジェクト: airysen/pandarallel
        def closure(df, func, *args, **kwargs):
            pool = ProcessingPool(nb_workers)
            manager = Manager()
            queue = manager.Queue()

            ProgressBars = (ProgressBarsNotebookLab
                            if in_notebook_lab else ProgressBarsConsole)

            axis = kwargs.get("axis", 0)
            if axis == "index":
                axis = 0
            elif axis == "columns":
                axis = 1

            opposite_axis = 1 - axis
            chunks = chunk(df.shape[opposite_axis], nb_workers)

            maxs = [chunk.stop - chunk.start for chunk in chunks]
            values = [0] * nb_workers
            finished = [False] * nb_workers

            if display_progress_bar:
                progress_bar = ProgressBars(maxs)

            object_id = plasma_client.put(df)

            workers_args = [(
                plasma_store_name,
                object_id,
                chunk,
                func,
                display_progress_bar,
                queue,
                index,
                args,
                kwargs,
            ) for index, chunk in enumerate(chunks)]

            result_workers = pool.amap(DataFrame.worker_apply, workers_args)

            if display_progress_bar:
                while not all(finished):
                    for _ in range(finished.count(False)):
                        index, value, status = queue.get()
                        values[index] = value
                        finished[index] = status

                    progress_bar.update(values)

            result = pd.concat(
                [
                    plasma_client.get(result_worker)
                    for result_worker in result_workers.get()
                ],
                copy=False,
            )

            return result
コード例 #5
0
def test_multiprocess():
    x_list = [1,2,3,4,5,6,7,]
    y_list = ['1','2','3','4','5','6','7']
    epoch = 8
    pool = Pool(epoch)
    res = pool.amap(test_task,x_list,y_list)
    pool.pipe(test_task,'22','222')
    pool.close()
    pool.join()
コード例 #6
0
        def closure(rolling, func, *args, **kwargs):
            pool = ProcessingPool(nb_workers)
            manager = Manager()
            queue = manager.Queue()

            ProgressBars = (ProgressBarsNotebookLab
                            if in_notebook_lab else ProgressBarsConsole)

            series = rolling.obj
            window = rolling.window
            chunks = chunk(len(series), nb_workers, window)

            maxs = [chunk.stop - chunk.start for chunk in chunks]
            values = [0] * nb_workers
            finished = [False] * nb_workers

            if display_progress_bar:
                progress_bar = ProgressBars(maxs)

            object_id = plasma_client.put(series)

            attribute2value = {
                attribute: getattr(rolling, attribute)
                for attribute in rolling._attributes
            }

            workers_args = [(plasma_store_name, object_id, chunk, func,
                             display_progress_bar, queue, index,
                             attribute2value, args, kwargs)
                            for index, chunk in enumerate(chunks)]

            result_workers = pool.amap(SeriesRolling.worker, workers_args)

            if display_progress_bar:
                while not all(finished):
                    for _ in range(finished.count(False)):
                        index, value, status = queue.get()
                        values[index] = value
                        finished[index] = status

                    progress_bar.update(values)

            result = pd.concat([
                plasma_client.get(result_worker)
                for result_worker in result_workers.get()
            ],
                               copy=False)

            return result
コード例 #7
0
ファイル: pathos_pool.py プロジェクト: CGCL-codes/naturalcc
class PPool:
    """pathos multi-processing pool"""

    def __init__(self, processor_num: int = None, ):
        self.processor_num = cpu_count() if processor_num is None \
            else min(processor_num, cpu_count())
        LOGGER.debug('Building Pathos multi-processing pool with {} cores.'.format(self.processor_num))
        self._pool = Pool(self.processor_num)

    def flatten_params(self, params: List):
        """params: List[*args, **kwargs]"""
        # block_size = int(math.ceil(len(params) / self.processor_num))
        # block_num = int(math.ceil(len(params) / block_size))
        block_size = (len(params) + self.processor_num - 1) // self.processor_num
        block_num = (len(params) + block_size - 1) // block_size
        block_params = [params[i * block_size:(i + 1) * block_size] for i in range(block_num)]
        return block_params

    def close(self):
        self._pool.close()
        self._pool.join()
        self._pool.clear()

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.close()

    def feed(self, func: Any, params: List, one_params: bool = False) -> List[Any]:
        if one_params:
            result = self._pool.amap(func, params).get()
        else:
            params = tuple(zip(*params))
            result = self._pool.amap(func, *params).get()
        return result
コード例 #8
0
    def start(self, text_data_dir, res_dir, nprocs=8):
        '''
        entry function
    
        text_data_dir: folder of raw data
        text_res_dir: folder of output
        verbose: int. Information is printed every N records
        nprocs: number of cores in parallel
        '''
        p = PathosPool(nprocs)

        filepathsvec, filenamesvec, respaths = list(), list(), list()
        for dirpath, _, filenames in os.walk(text_data_dir):
            for filename in filenames:
                if (("gz" in filename) and ('md5' not in filename)
                        and ('copy' not in filename)):
                    filepath = os.path.join(dirpath, filename)
                    print(filepath)
                    res_name = filename.split(".")[0] + ".csv.gz"
                    respath = os.path.join(res_dir, res_name)
                    #if os.path.exists(respath):
                    # pass
                    #else:
                    if True:
                        filepathsvec.append(filepath)
                        filenamesvec.append(filename)
                        respaths.append(respath)
                        #p.apply_async(process_data, args = (filepath,filename,
                        # respath, True,
                        # [title_stop_path,
                        #  affil_stop_path,
                        #  mesh_stop_path]))
        self.affildicts = p.amap(
            partial(self.process_data,
                    stop_paths=[
                        self.title_stop_path, self.affil_stop_path,
                        self.mesh_stop_path
                    ],
                    rm_stopwords=True,
                    affiliation_correction=True,
                    select_journals=self.select_journals), filepathsvec,
            filenamesvec, respaths)

        p.close()
        p.join()  # Having an issue joining
        print("joined")
        p.clear()  # Delete the pool
コード例 #9
0
ファイル: BD_Shift.py プロジェクト: nick-youngblut/SIPSim
def main(args):
    """Main function for calculating BD shift.

    Parameters
    ----------
    args : dict
        See ``BD_shift`` subcommand
    """
    sys.stderr.write('Loading KDE objects...\n')
    kde1 = Utils.load_kde(args['<kde1>'])
    kde2 = Utils.load_kde(args['<kde2>'])

    # adding top-level library ID if not present
    kde1 = kde_add_lib(kde1)
    kde2 = kde_add_lib(kde2)

    sys.stderr.write('Calculating BD shifts...\n')
    print '\t'.join(['lib1','lib2','taxon','BD_shift'])
    for libID1,d1 in kde1.items():
        for libID2,d2 in kde2.items():
            msg = '  Comparing libraries: "{}", "{}"\n'
            sys.stderr.write(msg.format(libID1, libID2))

            # overlap of taxa btw libraries
            taxa = taxon_overlap(d1, d2)            

            # calculating BD shift (in parallel)
            pfunc = partial(kde_intersect, 
                            start=float(args['--start']),
                            end=float(args['--end']),
                            step=float(args['--step']))

            pool = ProcessingPool(nodes=int(args['--np']))
            if args['--debug']:
                res = map(pfunc, [(taxon, d1[taxon], d2[taxon])
                                  for taxon in taxa])
            else:
                res = pool.amap(pfunc, [(taxon, d1[taxon], d2[taxon])
                                        for taxon in taxa])
                while not res.ready():
                    time.sleep(2)
                res = res.get()        
                            
            # writing out table
            for line in res:
                print '\t'.join([libID1, libID2] + \
                                [str(x) for x in line])                            
コード例 #10
0
ファイル: multithread.py プロジェクト: wgosal/signalAlign
def run_service3(service,
                 iterable,
                 iterable_arguments,
                 iterable_argument_names,
                 worker_count,
                 log_function=print):

    start = timer()
    args = list(iterable_arguments.keys())
    args.extend(iterable_argument_names)
    if log_function is not None:
        log_function("[run_service] running service {} with {} workers".format(
            service, worker_count))

    # add everything to work queue
    all_args = []
    for x in iterable:
        if type(x) is not tuple:
            x = [x]
        args = dict(dict(zip(iterable_argument_names, x)),
                    **iterable_arguments)
        all_args.append(args)

    pool = Pool(worker_count)
    results = pool.amap(service, all_args)
    final_results = results.get()
    # if example service model is used, metrics can be gathered in this way
    messages = []
    total = len(final_results)
    failure = 0
    for error, mem_usage in final_results:
        if error is not False:
            failure += 1
            if type(error) is str:
                messages.append(error)

    # if we should be logging and if there is material to be logged
    if log_function is not None and (total + failure + len(messages)) > 0:
        log_function(
            "[run_service] Summary {}:\n[run_service]\tTime: {}s\n[run_service]\tTotal: {}\n[run_service]\tFailure: {}"
            .format(service, int(timer() - start), total, failure))
        log_function("[run_service]\tMessages:\n[run_service]\t\t{}".format(
            "\n[run_service]\t\t".join(messages)))

    # return relevant info
    return total, failure, messages
コード例 #11
0
ファイル: experiment.py プロジェクト: hobotrl/hobotrl
class ParallelGridSearch(Experiment):

    param_queue = []

    def __init__(self, exp_class, parameters, parallel=4):
        """
        :param exp_class: subclass of Experiment to run
        :type exp_class: class<Experiment>
        :param parameters: dict of list, experiment parameters to search within, i.e.:
            {
                "entropy": [1e-2, 1e-3],
                "learning_rate": [1e-3, 1e-4],
                ...
            }
            or list of dict-of-list, representing multiple groups of parameters:
            [
            {
                "entropy": [1e-2, 1e-3],
                "learning_rate": [1e-3, 1e-4],
                ...
            },
            {
                "batch_size": [32, 64],
                ...
            }
            ]

        """
        super(ParallelGridSearch, self).__init__()
        self._exp_class, self._parameters, self._parallel = exp_class, parameters, parallel

    def run(self, args):
        self.log_root = args.logdir
        for parameter in GridSearch.product(self._parameters):
            label = GridSearch.labelize(parameter)
            ParallelGridSearch.param_queue.append(
                [self._exp_class, self.log_root, parameter, label, args])
        n = len(ParallelGridSearch.param_queue)
        task_index = list(range(n))
        logging.warning("total searched combination:%s", n)
        self.pool = Pool(self._parallel)
        ret = self.pool.amap(subprocess_run, task_index)
        ret.wait()
        self.pool.close()
        self.pool.join()
コード例 #12
0
ファイル: process_clueweb12.py プロジェクト: clover3/Chair
    def launch_task():
        def chunks(l, n):
            """Yield successive n-sized chunks from l."""
            for i in range(0, len(l), n):
                yield l[i:i + n]

        def list_fn(todo_list) -> PayloadSaver:
            payload_saver = PayloadSaver()
            for doc_id, text in todo_list:
                tokenize_doc_and_save(payload_saver, doc_id, text, tokenize_fn)
            return payload_saver

        from pathos.multiprocessing import ProcessingPool as Pool
        p = Pool(num_thread, daemon=True)

        split_n = int(len(todo_list) / num_thread) + 1
        args = chunks(todo_list, split_n)
        result_handle = p.amap(list_fn, args)
        return result_handle
コード例 #13
0
class C(object):
    def __init__(self,files):
        self.pool = Pool(4)
        self.files = files

    def raw_processor(self, fi,prefix,somedict):
        df = pd.read_table(
                fi,
                header=None,
                names=['artist_id','ts'],
                parse_dates=['ts'])\
            .sort_values(by='ts')
        user = fi.split('/')[-1][:-4]
        df.to_pickle('/Users/jaredlorince/git/MusicForaging/testData/scrobbles_test/{}_{}.pkl'.format(prefix,user))
        rootLogger.info('preprocessing complete for user {} ({})'.format(user,fi))

    def run_p(self):
        func_partial = partial(self.raw_processor,prefix='blah',somedict=d)
        result = self.pool.amap(func_partial, self.files)
コード例 #14
0
ファイル: get_sentiment.py プロジェクト: yxsu/RRGen
def main():
    output_review = []
    lines = get_data(valid_file)
    pool = Pool(8)
    block_num = 1000
    block_size = len(lines) // block_num

    for i in tqdm(range(block_num + 1)):
        if i == block_num:
            block = lines[i * block_size:]
        else:
            block = lines[i * block_size:(i + 1) * block_size]
        tunnel = pool.amap(sub_process, block)
        output = tunnel.get()
        output_review += output

    fw = open(output_valid, "w")
    fw.writelines(output_review)
    fw.close()
コード例 #15
0
        def closure(data, func, **kwargs):
            pool = ProcessingPool(nb_workers)
            manager = Manager()
            queue = manager.Queue()

            ProgressBars = (ProgressBarsNotebookLab
                            if in_notebook_lab else ProgressBarsConsole)

            chunks = chunk(data.size, nb_workers)

            maxs = [chunk.stop - chunk.start for chunk in chunks]
            values = [0] * nb_workers
            finished = [False] * nb_workers

            if display_progress_bar:
                progress_bar = ProgressBars(maxs)

            object_id = plasma_client.put(data)

            workers_args = [(plasma_store_name, object_id, chunk, func,
                             display_progress_bar, queue, index, kwargs)
                            for index, chunk in enumerate(chunks)]

            result_workers = pool.amap(Series.worker_map, workers_args)

            if display_progress_bar:
                while not all(finished):
                    for _ in range(finished.count(False)):
                        index, value, status = queue.get()
                        values[index] = value
                        finished[index] = status

                    progress_bar.update(values)

            result = pd.concat([
                plasma_client.get(result_worker)
                for result_worker in result_workers.get()
            ],
                               copy=False)

            return result
コード例 #16
0
    def generate_prescaled_dataset(self, sizes):
        if not self.prescaled_data: return
        print("Generating prescaled dataset...")
        data_path = self.prescaled_data_path
        if data_path is None: data_path = 'maua/datasets/%s_prescaled'%self.data_path.split('/')[-1]
        if not os.path.isdir(data_path) or \
           not len(self.dataloader)*len(sizes) == len(ProGANDataLoader(data_path=data_path)):
            # create a copy of the dataset on disk for each size
            from pathos.multiprocessing import ProcessingPool
            pool = ProcessingPool()

            def prescale_dataset(tup):
                image_file, size = tup
                try:
                    Image.open(data_path+"/%s/%s"%(size,image_file.split("/")[-1]))
                    return 1
                except:
                    os.makedirs(data_path+"/%s"%size, exist_ok=True)
                    image = Image.open(self.data_path+"/"+image_file)
                    transforms = tn.Compose([self.transforms, tn.Resize(size), tn.ToTensor()])
                    processed = th.clamp(transforms(image), min=0, max=1)
                    save_image(processed, data_path+"/%s/%s"%(size,image_file.split("/")[-1]))
                    return 1

            jobs = list(itertools.product(filter(lambda im: not im.startswith("."), os.listdir(self.data_path)), sizes))
            results = pool.amap(prescale_dataset, jobs)
            time.sleep(1)
            pbar = tqdm.tqdm(total=len(self.dataloader)*len(sizes))
            pbar.set_description("Images processed")
            while not results.ready():
                num_files = sum([len(os.listdir(data_path+"/%s"%size)) for size in sizes])
                pbar.update(num_files - pbar.n)
                time.sleep(1)
            pbar.close()
            pool.close()
            pool.join()
            assert sum(results.get()) == len(self.dataloader)*len(sizes)
        else:
            print("Dataset already generated.")
        self.data_path = data_path
コード例 #17
0
ファイル: discretize.py プロジェクト: LoLab-VU/pydyno
    def get_signatures(self, cpu_cores=1, verbose=False):
        if cpu_cores == 1:
            if self.nsims == 1:
                signatures = self.__signature(self._trajectories,
                                              self.parameters)
                signatures = signatures_to_dataframe(signatures, self.tspan,
                                                     self.nsims)
                signatures = signatures.transpose().stack(0)
                return signatures
            else:
                signatures = [0] * self.nsims
                for idx in range(self.nsims):
                    signatures[idx] = self.__signature(self._trajectories[idx],
                                                       self.parameters[idx])
                signatures = signatures_to_dataframe(signatures, self.tspan,
                                                     self.nsims)
                signatures = signatures.transpose().stack(0)
                return signatures
        else:
            if Pool is None:
                raise Exception(
                    'Please install the pathos package for this feature')
            if self.nsims == 1:
                self._trajectories = [self._trajectories]
                self._parameters = [self._parameters]

            p = Pool(cpu_cores)
            res = p.amap(self.__signature, self._trajectories, self.parameters)
            if verbose:
                while not res.ready():
                    print('We\'re not done yet, %s tasks to go!' %
                          res._number_left)
                    time.sleep(60)
            signatures = res.get()
            signatures = signatures_to_dataframe(signatures, self.tspan,
                                                 self.nsims)
            signatures = signatures.transpose().stack(0)
            return signatures
コード例 #18
0
ファイル: sos.py プロジェクト: connorourke/glopty
class SOS:
    def __init__(
        self,
        func,
        bounds,
        niter=500,
        population=10,
        ftol=0.001,
        workers=-1,
        restart=False,
        vec_dump=10,
        seed=None,
        aggressive_parasite=False
    ):
        """ 
        Initialise a symbiotic organisms search instance
        
        Args:
            func (callable): Function to be minimised. f(x, *args) - x is the argument to be minimised, args is a tuple of any additional  fixed parameters to specify the function
            bounds (list(Double)): list of pairs of (min,max) bounds for x
            niter (Int): number of iterations for optimiser
            population (Int): number of members in population
            ftol (Double) : convergence criteria for function
            workers (Int): number of multiprocessing workers to use. -1 sets workers to mp.cpu_count()
            vec_dump (Int): outputs restart file vec_dump number of steps  
            restart (Bool): restart the run from a restart file
            seed (Int): seed for random number generator, useful for tests

        """

        self.function = func
        self.niter = niter
        self.population = population
        self.particles = []
        self.best_global_vec = None
        self.best_global_fit = math.inf
        self.ftol = ftol
        self.bounds = np.asarray(bounds)
        self.restart = restart
        self.vector_restart = VectorInOut(bounds, "sos.rst")
        self.vec_dump = vec_dump
        self.seed = seed
        self.aggressive_parasite = aggressive_parasite

        if workers == -1:
            self.pool = Pool(mp.cpu_count())
        else:
            self.pool = Pool(workers)

    def vector_to_pot(self, vector):
        """
        Converts sos vector to actual x values

        Args:
            vector (numpy array): vector position in parameter space

        """
        return ((self.bounds[:, 1] - self.bounds[:, 0]) * vector) + self.bounds[:, 0]

    def part_init(self, vector):
        """
        Wrapper for particle initialisation for multiprocess
        
        Args:
        
        vector (numpy array)

        Returns: 
        
        vector (numpy array)
        result of function(vector)
        """

        return vector, self.function(self.vector_to_pot(vector), self.args)

    def initialise_particles(self):
        """
        Initialises the population: sets particle vectors using latin hypercube, and sets global bests

        Args:
            None
            
        """

        if self.restart:
            vec, fit = self.vector_restart.read_vectors()
            for i, vec in enumerate(vec):
                self.particles.append(Particle(np.asarray(vec), fit[i], i))

            self.set_global_best()
        else:
            vectors = lhs(len(self.bounds), self.population)
            res = self.pool.amap(self.part_init, vectors)
            for i, val in enumerate(res.get()):
                self.particles.append(Particle(val[0], val[1], i))

            self.best_global_fit = copy.deepcopy(self.particles[0].return_fit)
            self.best_global_vec = copy.deepcopy(self.particles[0].return_vec)

    def set_global_best(self):
        """
        Sets current global best fit for function, and corresponding vector
    
        Args:
            None
        """

        for particle in self.particles:
            if particle.fit < self.best_global_fit:
                self.best_global_fit = copy.deepcopy(particle.return_fit)
                self.best_global_vec = copy.deepcopy(particle.return_vec)
        output("Current best fit:" + str(self.best_global_fit) + "\n")

    def mutualism(self, part):
        """
        Performs mutualism step of sos

        Args:
            part (Particle): particle member of population on which to perform mutualism

        Returns:
            part.vector (np.array): vector position in paramter space
            part.fit    (Double): value of function at point in param space corresponding to part.vector
        """
        np.random.seed()
        b_ind = np.random.choice(
            [i for i in range(self.population) if i != part.index], 1, replace=False
        )[0]
        a = part.vector
        b = self.particles[b_ind].vector
        bf = np.random.randint(1, 3, 2)

        mutant = np.random.rand(len(self.bounds))
        mutual = (a + b) / 2
        new_a = np.clip(a + (mutant * (self.best_global_vec - (mutual * bf[0]))), 0, 1)
        new_b = np.clip(b + (mutant * (self.best_global_vec - (mutual * bf[1]))), 0, 1)

        for i, vec in enumerate([[part.index, new_a], [b_ind, new_b]]):
            trial_pot = self.vector_to_pot(vec[1])
            error = self.function(trial_pot, self.args)
            if error < self.particles[vec[0]].fit:
                self.particles[vec[0]].fit = error
                self.particles[vec[0]].vector = vec[1]

        return part.vector, part.fit

    def run_mutualism(self):
        """
        Wrapper for mutualism step, for multiprocessing

        Args:
            None
        """

        res = self.pool.amap(self.mutualism, self.particles)
        for i, val in enumerate(res.get()):
            self.particles[i].vector, self.particles[i].fit = val

    def commensalism(self, part):
        """
        Performs commensalism step of sos

        Args:
            part (Particle): particle member of population on which to perform commensalism

        Returns:
            part.vector (np.array): vector position in paramter space
            part.fit    (Double): value of function at point in param space corresponding to part.vector
        """
        np.random.seed()
        b_ind = np.random.choice(
            [i for i in range(self.population) if i != part.index], 1, replace=False
        )[0]

        a = part.vector
        b = self.particles[b_ind].vector

        mutant = np.random.uniform(-1, 1, len(self.bounds))
        new_a = np.clip(a + (mutant[0] * (self.best_global_vec - b)), 0, 1)
        trial_pot = self.vector_to_pot(new_a)
        error = self.function(trial_pot, self.args)

        if error < part.fit:
            part.fit = error
            part.vector = new_a

        return part.vector, part.fit

    def run_commensalism(self):
        """
        Wrapper for commensalism step, for multiprocessing

        Args:
            None
        """

        res = self.pool.amap(self.commensalism, self.particles)
        for i, val in enumerate(res.get()):
            self.particles[i].vector, self.particles[i].fit = val

    def parasitism(self, part):
        """
        Performs parasitism step of sos

        Args:
            part (Particle): particle member of population on which to perform parasitism

        Returns:
            part.vector (np.array): vector position in paramter space
            part.fit    (Double): value of function at point in param space corresponding to part.vector
        """
        np.random.seed()        
        b_ind = np.random.choice(
            [i for i in range(self.population) if i != part.index], 1, replace=False
        )[0]
        
        if self.aggressive_parasite:
        
            trial = np.random.uniform(0, 1, len(self.bounds))
            cross_points = np.random.rand(len(self.bounds)) < 0.3
            if not np.any(cross_points):
                cross_points[np.random.randint(0, len(self.bounds))] = True

            
            parasite = np.where(cross_points,trial,part.vector)
        else:
            parasite = copy.deepcopy(part.vector)
            parasite[np.random.randint(0, len(self.bounds))] = np.random.rand()


        trial_pot = self.vector_to_pot(parasite)
        error = self.function(trial_pot, self.args)

        if error < self.particles[b_ind].fit:
            self.particles[b_ind].fit = error
            self.particles[b_ind].vector = parasite

        return b_ind, self.particles[b_ind].fit, self.particles[b_ind].vector

    def run_parasitism(self):
        """
        Wrapper for parasitism step, for multiprocessing

        Args:
            None
        """

        res = self.pool.amap(self.parasitism, self.particles)
        for i, val in enumerate(res.get()):
            self.particles[val[0]].vector, self.particles[val[0]].fit = val[2], val[1]

    def optimise(self, args):
        """
        Optimise the function: run 

        Args:
            function (Function): function to optimise
            args (Optional): any further args required by function 

        """

        self.args = args
        self.initialise_particles()

        for step in range(self.niter):
            output("Doing step: " + str(step) + "\n")
            self.run_mutualism()
            self.run_commensalism()
            self.run_parasitism()
            self.set_global_best()
            if self.best_global_fit < self.ftol:
                break
            if step % self.vec_dump == 0:
                output("Going to dump particle vectors\n")
                self.vector_restart.write_vectors(self.particles)

        results_min = OptimizeResult()
        results_min.x = self.vector_to_pot(self.best_global_vec)
        results_min.fun = self.best_global_fit

        self.vector_restart.write_vectors(self.particles)

        return results_min
コード例 #19
0
class IngestionManagerPandas:
    """Class to manage the multi-threaded data ingestion process.

    This class will manage the data ingestion process which is multi-threaded.

    Attributes:
        feature_group_name (str): name of the Feature Group.
        sagemaker_fs_runtime_client_config (Config): instance of the Config class
            for boto calls.
        data_frame (DataFrame): pandas DataFrame to be ingested to the given feature group.
        max_workers (int): number of threads to create.
        max_processes (int): number of processes to create. Each process spawns
            ``max_workers`` threads.
        profile_name (str): the profile credential should be used for ``PutRecord``
            (default: None).
    """

    feature_group_name: str = attr.ib()
    sagemaker_fs_runtime_client_config: Config = attr.ib()
    max_workers: int = attr.ib(default=1)
    max_processes: int = attr.ib(default=1)
    profile_name: str = attr.ib(default=None)
    _async_result: AsyncResult = attr.ib(default=None)
    _processing_pool: ProcessingPool = attr.ib(default=None)
    _failed_indices: List[int] = attr.ib(factory=list)

    @staticmethod
    def _ingest_single_batch(
        data_frame: DataFrame,
        feature_group_name: str,
        client_config: Config,
        start_index: int,
        end_index: int,
        profile_name: str = None,
    ) -> List[int]:
        """Ingest a single batch of DataFrame rows into FeatureStore.

        Args:
            data_frame (DataFrame): source DataFrame to be ingested.
            feature_group_name (str): name of the Feature Group.
            client_config (Config): Configuration for the sagemaker feature store runtime
                client to perform boto calls.
            start_index (int): starting position to ingest in this batch.
            end_index (int): ending position to ingest in this batch.
            profile_name (str): the profile credential should be used for ``PutRecord``
                (default: None).

        Returns:
            List of row indices that failed to be ingested.
        """
        retry_config = client_config.retries
        if "max_attempts" not in retry_config and "total_max_attempts" not in retry_config:
            client_config = copy.deepcopy(client_config)
            client_config.retries = {"max_attempts": 10, "mode": "standard"}
        sagemaker_featurestore_runtime_client = boto3.Session(
            profile_name=profile_name).client(
                service_name="sagemaker-featurestore-runtime",
                config=client_config)

        logger.info("Started ingesting index %d to %d", start_index, end_index)
        failed_rows = list()
        for row in data_frame[start_index:end_index].itertuples():
            record = [
                FeatureValue(
                    feature_name=data_frame.columns[index - 1],
                    value_as_string=str(row[index]),
                ) for index in range(1, len(row)) if pd.notna(row[index])
            ]
            try:
                sagemaker_featurestore_runtime_client.put_record(
                    FeatureGroupName=feature_group_name,
                    Record=[value.to_dict() for value in record],
                )
            except Exception as e:  # pylint: disable=broad-except
                logger.error("Failed to ingest row %d: %s", row[0], e)
                failed_rows.append(row[0])
        return failed_rows

    @property
    def failed_rows(self) -> List[int]:
        """Get rows that failed to ingest.

        Returns:
            List of row indices that failed to be ingested.
        """
        return self._failed_indices

    def wait(self, timeout=None):
        """Wait for the ingestion process to finish.

        Args:
            timeout (Union[int, float]): ``concurrent.futures.TimeoutError`` will be raised
                if timeout is reached.
        """
        try:
            results = self._async_result.get(timeout=timeout)
        except KeyboardInterrupt as i:
            # terminate workers abruptly on keyboard interrupt.
            self._processing_pool.terminate()
            self._processing_pool.close()
            self._processing_pool.clear()
            raise i
        else:
            # terminate normally
            self._processing_pool.close()
            self._processing_pool.clear()

        self._failed_indices = [
            failed_index for failed_indices in results
            for failed_index in failed_indices
        ]

        if len(self._failed_indices) > 0:
            raise IngestionError(
                self._failed_indices,
                f"Failed to ingest some data into FeatureGroup {self.feature_group_name}",
            )

    def _run_multi_process(self,
                           data_frame: DataFrame,
                           wait=True,
                           timeout=None):
        """Start the ingestion process with the specified number of processes.

        Args:
            data_frame (DataFrame): source DataFrame to be ingested.
            wait (bool): whether to wait for the ingestion to finish or not.
            timeout (Union[int, float]): ``concurrent.futures.TimeoutError`` will be raised
                if timeout is reached.
        """
        # pylint: disable=I1101
        batch_size = math.ceil(data_frame.shape[0] / self.max_processes)
        # pylint: enable=I1101

        args = []
        for i in range(self.max_processes):
            start_index = min(i * batch_size, data_frame.shape[0])
            end_index = min(i * batch_size + batch_size, data_frame.shape[0])
            args += [(
                self.max_workers,
                self.feature_group_name,
                self.sagemaker_fs_runtime_client_config,
                data_frame[start_index:end_index],
                start_index,
                timeout,
                self.profile_name,
            )]

        def init_worker():
            # ignore keyboard interrupts in child processes.
            signal.signal(signal.SIGINT, signal.SIG_IGN)

        self._processing_pool = ProcessingPool(self.max_processes, init_worker)
        self._processing_pool.restart(force=True)

        f = lambda x: IngestionManagerPandas._run_multi_threaded(
            *x)  # noqa: E731
        self._async_result = self._processing_pool.amap(f, args)

        if wait:
            self.wait(timeout=timeout)

    @staticmethod
    def _run_multi_threaded(
        max_workers: int,
        feature_group_name: str,
        sagemaker_fs_runtime_client_config: Config,
        data_frame: DataFrame,
        row_offset=0,
        timeout=None,
        profile_name=None,
    ) -> List[int]:
        """Start the ingestion process.

        Args:
            data_frame (DataFrame): source DataFrame to be ingested.
            row_offset (int): if ``data_frame`` is a partition of a parent DataFrame, then the
                index of the parent where ``data_frame`` starts. Otherwise, 0.
            wait (bool): whether to wait for the ingestion to finish or not.
            timeout (Union[int, float]): ``concurrent.futures.TimeoutError`` will be raised
                if timeout is reached.
            profile_name (str): the profile credential should be used for ``PutRecord``
                (default: None).

        Returns:
            List of row indices that failed to be ingested.
        """
        executor = ThreadPoolExecutor(max_workers=max_workers)
        # pylint: disable=I1101
        batch_size = math.ceil(data_frame.shape[0] / max_workers)
        # pylint: enable=I1101

        futures = {}
        for i in range(max_workers):
            start_index = min(i * batch_size, data_frame.shape[0])
            end_index = min(i * batch_size + batch_size, data_frame.shape[0])
            futures[executor.submit(
                IngestionManagerPandas._ingest_single_batch,
                feature_group_name=feature_group_name,
                data_frame=data_frame,
                start_index=start_index,
                end_index=end_index,
                client_config=sagemaker_fs_runtime_client_config,
                profile_name=profile_name,
            )] = (start_index + row_offset, end_index + row_offset)

        failed_indices = list()
        for future in as_completed(futures, timeout=timeout):
            start, end = futures[future]
            result = future.result()
            if result:
                logger.error("Failed to ingest row %d to %d", start, end)
            else:
                logger.info("Successfully ingested row %d to %d", start, end)
            failed_indices += result

        executor.shutdown(wait=False)

        return failed_indices

    def run(self, data_frame: DataFrame, wait=True, timeout=None):
        """Start the ingestion process.

        Args:
            data_frame (DataFrame): source DataFrame to be ingested.
            wait (bool): whether to wait for the ingestion to finish or not.
            timeout (Union[int, float]): ``concurrent.futures.TimeoutError`` will be raised
                if timeout is reached.
        """
        self._run_multi_process(data_frame=data_frame,
                                wait=wait,
                                timeout=timeout)
コード例 #20
0
def main(uargs):
    """Main function for making OTU table.
    
    Parameters
    ----------
    uargs : dict
        See ``OTU_table`` subcommand.
    """
    # args formatting
    try:
        uargs['--abs'] = int(float(uargs['--abs']))
    except TypeError:
        msg = '"{}" must be float-like'
        raise TypeError(msg.format(uargs['--abs']))

    # logging
    status = Utils.Status(uargs['--quiet'])

    # loading files
    sys.stderr.write('Loading files...\n')
    ## BD kde
    BD_KDE_all = Utils.load_kde(uargs['<BD_KDE>'])
    BD_KDE_all_type = Utils.KDE_type(BD_KDE_all)
    ## community file
    comm_tbl = CommTable.from_csv(uargs['<communities>'], sep='\t')
    comm_tbl.abs_abund = uargs['--abs']
    ## fraction file
    frac_tbl = FracTable.from_csv(uargs['<fractions>'], sep='\t')

    # iter by library:
    sys.stderr.write('Simulating OTUs...\n')
    u_taxon_names = comm_tbl.get_unique_taxon_names()
    OTU_counts = []  # list of all library-specific OTU_count dataframes
    for libID in comm_tbl.iter_libraries():
        sys.stderr.write('Processing library: "{}"\n'.format(libID))

        # dict of KDEs for library (libID)
        BD_KDE = _get_KDEs_for_libID(BD_KDE_all, BD_KDE_all_type, libID)

        # fraction bin list for library
        frac_bins = frac_tbl.BD_bins(libID)
        assert len(frac_bins) > 0, 'No fractions for library "{}"'.format(
            libID)
        libFracBins = [x for x in frac_bins]

        # iter of taxa in parallel
        pfunc = partial(sim_OTU,
                        comm_tbl=comm_tbl,
                        libID=libID,
                        libFracBins=libFracBins,
                        maxsize=int(uargs['--max']))

        pool = ProcessingPool(nodes=int(uargs['--np']))
        if uargs['--debug']:
            ret = map(pfunc, [(i, taxon, BD_KDE[taxon])
                              for i, taxon in enumerate(u_taxon_names)])
        else:
            ret = pool.amap(pfunc, [(i, taxon, BD_KDE[taxon])
                                    for i, taxon in enumerate(u_taxon_names)])
            while not ret.ready():
                time.sleep(2)
            ret = ret.get()

        # converting to a pandas dataframe
        df = pd.DataFrame([x[1] for x in ret]).fillna(0)
        df['taxon'] = [x[0] for x in ret]
        df = pd.melt(df, id_vars=['taxon'])
        df.columns = ['taxon', 'fraction', 'count']
        df['library'] = libID
        x = df['fraction'].apply(_get_BD_range).apply(pd.Series)
        x.columns = ['BD_min', 'BD_mid', 'BD_max']
        df = pd.concat([df, x], axis=1)
        df = df[[
            'library', 'taxon', 'fraction', 'BD_min', 'BD_mid', 'BD_max',
            'count'
        ]]
        df.sort_values(by=['taxon', 'fraction'], inplace=True)

        # Adding to dataframe of all libraries
        OTU_counts.append(df)

    # combining library-specific dataframes
    df_comb = pd.concat(OTU_counts, ignore_index=False)

    # calculating taxon relative abundances
    df_comb['count'] = df_comb['count'].astype('int')
    cols = ['library', 'fraction']
    df_comb['rel_abund'] = df_comb.groupby(cols).transform(tss)['count']

    # writing out long form of table
    df_comb.sort_values(by=['library', 'taxon', 'BD_mid'], inplace=True)
    df_comb.to_csv(sys.stdout, sep='\t', index=False)
コード例 #21
0
ファイル: testing.py プロジェクト: yashkp1234/DataScience
if __name__ == '__main__':
    import time

    start = time.time()
    threads = []
    results = []
    num = 10000

    start = time.time()
    results = []
    res = [square(x, results) for x in range(num)]
    print(results)
    end = time.time()
    print(end - start)

    start = time.time()

    with Pool(5) as p:
        rez = []
        results = p.amap(f, range(num))
        results = results.get()

    print(results)
    end = time.time()
    print(end - start)

pool = Pool(cpu_count())
results = pool.amap(self.__build_forest, range(self.__n_estimators))
self.__trees = results.get()
コード例 #22
0
        regex_search.save_regex(args.save_regex[0])
    elif args.existing_regex is not None:
        regex_search.load_regex(args.existing_regex[0])
    print("Loaded %d regex in %d seconds!" % (len(regex_search.regex), time.time() - start_regex_time))
    start_regex_time = None
    logging.info("Starting searches")

    #results = search_regex(regex_worker, regex_res, fastq_file, args.cpus)
    if __name__ == '__main__':
        result_1 = []
        output_1 = []

        if args.multithread is True:
            # errors = Queue()
            p = Pool(nodes=args.cpus[0])
            res = p.amap(regex_worker_multithread, fastq_file.sequences)
            count = 0
            while not res.ready():
                count += 2
                print("\rWaiting. Timer: %d" % count, end='')
                time.sleep(2)
            # for e in errors.get():
            #     print(e)
            # p.terminate()
            result_1 = res.get()
            print("\nDone searching")

        elif args.multithread is False:
            num_seq = 0
            for seq in fastq_file.sequences:
                num_seq += 1
コード例 #23
0
    clus.diss_matrix(n_jobs=cpus)
    sil_df = clus.silhouette_score_spectral_range(cluster_range=range(2, 31), n_jobs=4, random_state=1234)
    if sil_threshold:
        silh_diff = sil_df['cluster_silhouette'].max() - sil_threshold
        # Define n_clus to have the minimum number of clusters when silh scores are too similar
        best_silhs = sil_df.loc[sil_df['cluster_silhouette'] > silh_diff]
        best_silh, n_clus = best_silhs.loc[best_silhs['num_clusters'].idxmin()]
    else:
        best_silh, n_clus = sil_df.loc[sil_df['cluster_silhouette'].idxmax()]
    n_clus = int(n_clus)
    clus.spectral_clustering(n_clusters=n_clus, n_jobs=4, random_state=1234)
    cluster_information = {signatures_idx: clus.cluster_percentage_color(),
                           'best_silh': best_silh, 'labels': clus.labels}
    return cluster_information


drivers = all_signatures.keys()
drivers.remove('species_combinations')

drivers_to_analyze = []
for dr in drivers:
    if len(all_signatures['species_combinations'][dr]['products'][1]) > 1:
        drivers_to_analyze.append(dr)

p = Pool(cpus)
res = p.amap(cluster_percentage_color_aggomerative, drivers_to_analyze)
results = res.get()

with open('cluster_info_agglomerative_pydream_consumption.pickle', 'wb') as fp:
    pickle.dump(results, fp)
コード例 #24
0
ファイル: test_mp.py プロジェクト: duncandrennan/pathos
#!/usr/bin/env python
#
# Author: Mike McKerns (mmckerns @caltech and @uqfoundation)
# Copyright (c) 1997-2014 California Institute of Technology.
# License: 3-clause BSD.  The full license text is available at:
#  - http://trac.mystic.cacr.caltech.edu/project/pathos/browser/pathos/LICENSE

# instantiate and configure the worker pool
from pathos.multiprocessing import ProcessingPool
pool = ProcessingPool(nodes=4)

_result = map(pow, [1,2,3,4], [5,6,7,8]) 

# do a blocking map on the chosen function
result = pool.map(pow, [1,2,3,4], [5,6,7,8])
assert result == _result

# do a non-blocking map, then extract the result from the iterator
result_iter = pool.imap(pow, [1,2,3,4], [5,6,7,8])
result = list(result_iter)
assert result == _result

# do an asynchronous map, then get the results
result_queue = pool.amap(pow, [1,2,3,4], [5,6,7,8])
result = result_queue.get()
assert result == _result

コード例 #25
0
def extract_nc(path, coord_path, variable_name, precision=3, num_pool=4):
    """extract variable(given region by coord) from .nc file
    input:
        path: path of the source nc file
        coord_path: path of the coord extracted by fishnet: OID_, lon, lat
        variable_name: name of the variable need to read
        precision: the minimum precision of lat/lon, to match the lat/lon of source nc file
        num_pool: the number of processes

    output:
        {variable_name}.txt [i, j]: i(file number) j(grid point number)
        lat_index.txt/lon_index.txt
        coord.txt
    """
    print(f"variable:{variable_name}")
    coord = pd.read_csv(coord_path, sep=",")  # read coord(extract by fishnet)
    print(f"grid point number:{len(coord)}")
    coord = coord.round(
        precision)  # coord precision correlating with .nc file lat/lon
    result = [path + "/" + d for d in os.listdir(path) if d[-4:] == ".nc4"]
    print(f"file number:{len(result)}")
    variable = np.zeros(
        (len(result),
         len(coord) + 1))  # save the path correlated with read order

    # calculate the index of lat/lon in coord from source nc file
    f1 = Dataset(result[0], 'r')
    Dataset.set_auto_mask(f1, False)
    lat_index = []
    lon_index = []
    lat = f1.variables["lat"][:]
    lon = f1.variables["lon"][:]
    for j in range(len(coord)):
        lat_index.append(np.where(lat == coord["lat"][j])[0][0])
        lon_index.append(np.where(lon == coord["lon"][j])[0][0])
    f1.close()

    # read variable based on the lat_index/lon_index, based on multiprocessing
    def read(i):
        """read variable from nc file(i), used in pool"""
        vb = []
        f = Dataset(result[i], 'r')
        vb.append(float(re.search(r"\d{6}", result[i])[0]))
        # re: the number depend on the nc file name(daily=8, month=6)
        Dataset.set_auto_mask(f, False)
        for j in range(len(coord)):
            vb.append(f.variables[variable_name][0, lat_index[j],
                                                 lon_index[j]])
            # require: nc file only have three dimension
            # f.variables['Rainf_f_tavg'][0, lat_index_lp, lon_index_lp]is a mistake, we only need the file
            # that lat/lon corssed (1057) rather than meshgrid(lat, lon) (1057*1057)
        print(f"complete read file:{i}")
        return vb

    po = Pool(num_pool)  # pool
    res_po = [po.amap(read, (i, ))
              for i in range(len(result))]  # the results of every process
    po.close()
    po.join()
    for i in range(len(result)):
        variable[i, :] = res_po[i].get()[0]  # get varibale from result
    # sort by time
    variable = variable[variable[:, 0].argsort()]
    # save
    np.savetxt(f'{variable_name}.txt', variable, delimiter=' ')
    np.savetxt('lat_index.txt', lat_index, delimiter=' ')
    np.savetxt('lon_index.txt', lon_index, delimiter=' ')
    coord.to_csv("coord.txt")
コード例 #26
0
        silh_diff = sil_df['cluster_silhouette'].max() - sil_threshold
        # Define n_clus to have the minimum number of clusters when silh scores are too similar
        best_silhs = sil_df.loc[sil_df['cluster_silhouette'] > silh_diff]
        best_silh, n_clus = best_silhs.loc[best_silhs['num_clusters'].idxmin()]
    else:
        best_silh, n_clus = sil_df.loc[sil_df['cluster_silhouette'].idxmax()]
    n_clus = int(n_clus)
    clus.spectral_clustering(n_clusters=n_clus, n_jobs=4, random_state=1234)
    cluster_information = {
        signatures_idx: clus.cluster_percentage_color(),
        'best_silh': best_silh,
        'labels': clus.labels
    }
    return cluster_information


drivers = all_signatures.keys()
drivers.remove('species_combinations')

drivers_to_analyze = []
for dr in drivers:
    if len(all_signatures['species_combinations'][dr]['products'][1]) > 1:
        drivers_to_analyze.append(dr)

p = Pool(cpus)
res = p.amap(cluster_percentage_color_spectral, drivers_to_analyze)
results = res.get()

with open('cluster_info_spectral_sampled_kd_consumption.pickle', 'wb') as fp:
    pickle.dump(results, fp)
コード例 #27
0
class DiffEvolution:
    def __init__(self,
                 func,
                 bounds,
                 niter=100,
                 population=100,
                 ftol=0.001,
                 workers=-1,
                 vec_dump=10,
                 restart=False,
                 mut_fac=0.3,
                 cross_prob=0.7):
        """
        Initialise a differential evolution optimisation  instance.

        Args:
            func (callable): Function to be minimised. f(x, *args) - x is the argument to be minimised, args is a tuple of any additional  fixed parameters to specify the function
            bounds (list(Double)): list of pairs of (min,max) bounds for 
x
            niter (Int): number of iterations for optimiser
            population (Int): number of members in population
            ftol (Double) : convergence criteria for function
            workers (Int): number of multiprocessing workers to use. -1 sets workers to mp.cpu_count()
            vec_dump (Int): outputs restart file vec_dump number of steps  
            restart (Bool): restart the run from a restart file
            mut_fac (Double): mutation factor of diff evolution
            cross_prob (Double): cross over probability for mutant to generate trial

        """

        self.function = func
        self.bounds = bounds
        self.niter = niter
        self.population = population
        self.ftol = ftol
        self.vec_dump = vec_dump
        self.restart = restart
        self.mut_fac = mut_fac
        self.cross_prob = cross_prob
        self.particles = []
        self.best_global_vec = None
        self.best_global_fit = math.inf
        self.dim = len(self.bounds)
        self.vector_restart = VectorInOut(bounds, "sos.rst")

        if workers == -1:
            self.pool = Pool(mp.cpu_count())
        else:
            self.pool = Pool(workers)

    def part_init(self, vector):
        """
        Wrapper for particle initialisation for multiprocess
        
        Args:
        
        vector (numpy array)

        Returns: 
        
        vector (numpy array)
        result of function(vector)
        """
        return vector, self.function(self.vector_to_pot(vector), self.args)

    def initialise_particles(self):
        """
        Initialises the population: sets particle vectors using latin hypercube, and sets global bests

        Args:
            None
            
        """

        if self.restart:
            vec, fit = self.vector_restart.read_vectors()
            for i, vec in enumerate(vec):
                self.particles.append(Particle(np.asarray(vec), fit[i], i))

            self.set_global_best()
        else:
            vectors = lhs(len(self.bounds), self.population)
            res = self.pool.amap(self.part_init, vectors)
            for i, val in enumerate(res.get()):
                self.particles.append(Particle(val[0], val[1], i))
            self.best_global_fit = copy.deepcopy(self.particles[0].return_fit)
            self.best_global_vec = copy.deepcopy(self.particles[0].return_vec)

    def vector_to_pot(self, vector):
        """
        Converts particle vector to actual x values
        
        Args:
            vector (numpy array): vector position in parameter space

        """

        return ((self.bounds[:, 1] - self.bounds[:, 0]) *
                vector) + self.bounds[:, 0]

    def set_global_best(self):
        """
        Sets current global best fit for function, and corresponding vector
    
        Args:
            None
        """

        for particle in self.particles:
            if particle.fit < self.best_global_fit:
                self.best_global_fit = copy.deepcopy(particle.return_fit)
                self.best_global_vec = copy.deepcopy(particle.return_vec)
        output("Current best fit:" + str(self.best_global_fit) + "\n")

    def evolve(self, part):

        np.random.seed()
        ind = np.random.choice(
            [i for i in range(self.population) if i != part.index],
            3,
            replace=False)
        a = self.particles[ind[0]]
        b = self.particles[ind[1]]
        c = self.particles[ind[2]]

        mutant = a.vector + self.mut_fac * (b.vector - c.vector)
        mutant[mutant > 1.0] = np.random.uniform()
        mutant[mutant < 0.0] = np.random.uniform()
        cross_points = np.random.rand(self.dim) < self.cross_prob

        if not np.any(cross_points):
            cross_points[np.random.randint(0, self.dim)] = True
        trial = np.where(cross_points, mutant, part.vector)
        fit = self.function(self.vector_to_pot(trial), self.args)

        if fit < part.fit:
            return trial, fit
        else:
            return part.vector, part.fit

    def run_evolution(self):

        res = self.pool.amap(self.evolve, self.particles)
        for i, val in enumerate(res.get()):
            self.particles[i].vector, self.particles[i].fit = val

    def optimise(self, args):
        """                                                         
        Optimise the function: run                                  
                                                                    
        Args:                                                       
            function (Function): function to optimise               
            args (Optional): any further args required by function  
                                                                    
        """

        self.args = args
        self.initialise_particles()
        self.set_global_best()

        for step in range(self.niter):
            output("Doing step: " + str(step) + "\n")
            self.run_evolution()
            self.set_global_best()
            if self.best_global_fit < self.ftol:
                break
            if step % self.vec_dump == 0:
                output("Going to dump particle vectors\n")
                self.vector_restart.write_vectors(self.particles)
コード例 #28
0
import numpy as np
from pysb.simulator import ScipyOdeSimulator
from pathos.multiprocessing import ProcessingPool as Pool
from earm2_flat import model
import pickle

tspan = np.linspace(0, 20000, 100)


def run_simulation(param_values):
    sim = ScipyOdeSimulator(model,
                            tspan=tspan).run(param_values=param_values).species
    return sim


all_parameters = np.load('sampled_kd_ic_parameter.npy')
cpu_cores = 31
p = Pool(cpu_cores)
res = p.amap(run_simulation, all_parameters)
sims = res.get()

with open('sims_sampled_kd_ic_list.pickle', 'wb') as fp:
    pickle.dump(sims, fp)
コード例 #29
0
        fuzzy = False
else:
    regex_search = MicrosatelliteRegex(fuzzy_substitution=0,
                                       min_seq_length=min_seq_len)
    fuzzy = False

regex_pregen = regex_search.create_regex(preloaded=True)
logging.info("Starting searches")

if __name__ == '__main__':
    result_1 = []
    output_1 = []

    if args.multithread is True:
        p = Pool(nodes=args.cpus[0])
        res = p.amap(regex_worker_multithread, fastq_file.sequences)
        count = 0
        while not res.ready():
            count += 2
            print("\rWaiting. Timer: %d" % count, end='')
            time.sleep(2)
        result_1 = res.get()
        print("\nDone searching")

    elif args.multithread is False:
        num_seq = 0
        for seq in fastq_file.sequences:
            num_seq += 1
            rexex = regex_worker(seq.seq)
            if rexex:
                result_1.append(rexex)
        sim1 = ScipyOdeSimulator(model, tspan=tspan,
                                 param_values=pars_label1).run()
        sim1.save(
            'sims_baxkd80_sensitivities_sampled_kd/earm_scipyode_sims_good{0}.h5'
            .format(label))

    if label == 6:
        """
        Cluster 6:
        Dominant reactions:
        BidM_BaxC, BidM_Bcl2M
        """
        pars_label1 = pars[np.where(clus_sp37_labels == label)]
        pars_label1[:, 63] = pars_ref1[63] * 0.2  # 80% Knock down of bax
        # pars_label1[:, 58] = pars_ref1[58] * 0.2  # 20% Knock down of Bcl2

        # pars_label1[:, 64] = pars_label1[64] * 0.8  # 80% Knock down of bak
        # pars_label1[:, 57] = pars_label1[57] * 0.8  # 80% Knock down of mcl1
        sim1 = ScipyOdeSimulator(model, tspan=tspan,
                                 param_values=pars_label1).run()
        sim1.save(
            'sims_baxkd80_sensitivities_sampled_kd/earm_scipyode_sims_good{0}.h5'
            .format(label))

    return


p = Pool(25)
res = p.amap(sims_kd, unique_labels)
res.get()
コード例 #31
0
ファイル: test_mp.py プロジェクト: WarrenWeckesser/pathos
# instantiate and configure the worker pool
from pathos.multiprocessing import ProcessingPool
pool = ProcessingPool(nodes=4)

_result = map(pow, [1,2,3,4], [5,6,7,8]) 

# do a blocking map on the chosen function
result = pool.map(pow, [1,2,3,4], [5,6,7,8])
assert result == _result

# do a non-blocking map, then extract the result from the iterator
result_iter = pool.imap(pow, [1,2,3,4], [5,6,7,8])
result = list(result_iter)
assert result == _result

# do an asynchronous map, then get the results
result_queue = pool.amap(pow, [1,2,3,4], [5,6,7,8])
result = result_queue.get()
assert result == _result