def __init__(self, model, step_finish, args = None, split = 0, buffer = 6,\
                 recombine = None,recombine_args = None, verbose = False, \
                 boundary_pass = 1):

        self.model = self.grid_adjust(model)
        #add in function to finish steps
        self.step_finish = step_finish
        self.step_args = args

        #Get the number of CPUs unless user specified
        if split == 0:
            self.ncpus = cpu_count()
        else:
            self.ncpus = split

        #create the number of process available
        self.pool = ProcessPool(nodes=self.ncpus)
        self.pipes = self.pipe_setup(self.ncpus)

        self.buffer = buffer
        self.multi_models = collections.OrderedDict()
        #dictionary to track when all steps on each processor complete
        self.sync_status = collections.OrderedDict()

        #add ability for user to deconflict
        self.boundary_pass = boundary_pass

        if recombine == None:
            self.recombine = self.recombine_default
            self.recombine_args = recombine_args
        else:
            self.recombine = recombine
            self.recombine_args = recombine_args

        self.verbose = verbose
Beispiel #2
0
def load_data_from_files_raw(
    data_files: Iterable[Path],
    # humm that is not very nice type signature... need to create interface for that
    parse_callback: Callable[..., Tuple[str, int, Iterable[T_Single]]],  # type: ignore
    parallelize: bool,
    *args,
) -> Dict[str, Tuple[int, Iterable[T_Single]]]:
    tasks_as_args = [[data_file, *args] for data_file in data_files]

    if parallelize:
        pool = ProcessPool()

        # needed that hack to work... issues with serialization of classes
        # doesn't work with basic multiprocessing so needed pathos
        def cb(x):
            return parse_callback(*x)

        per_file_results = list(pool.map(cb, tasks_as_args))
    else:
        per_file_results = [parse_callback(*task_args) for task_args in tasks_as_args]  # type: ignore

    lang_samples_iter: Dict[str, Tuple[int, List[Iterable[T_Single]]]] = {}
    for (lang, lg, samples_iter) in per_file_results:
        if lang not in lang_samples_iter:
            lang_samples_iter[lang] = (0, [])
        (lg0, iters) = lang_samples_iter[lang]
        iters.append(samples_iter)
        lang_samples_iter[lang] = (lg0 + lg, iters)

    lang_samples: Dict[str, Tuple[int, Iterable[T_Single]]] = {}
    for (lang, (lg, iters)) in lang_samples_iter.items():
        lang_samples[lang] = (lg, itertools.chain(*iters))

    return lang_samples
Beispiel #3
0
    def download(self, index_path, txt_dir):
        # Save to txt dir
        self.txt_dir = txt_dir
        if not os.path.exists(self.txt_dir):
            os.makedirs(self.txt_dir)

        # Count Total Urls to Process
        with open(index_path, 'r') as fin:
            num_urls = sum(1 for line in fin)

        def iter_path_generator(index_path):

            with open(index_path, 'r') as fin:
                reader = csv.reader(fin,
                                    delimiter=',',
                                    quotechar='\"',
                                    quoting=csv.QUOTE_ALL)
                for url_idx, row in enumerate(reader, 1):
                    form_type, company_name, cik, date_filed, filename = row
                    url = os.path.join(SEC_GOV_URL,
                                       filename).replace("\\", "/")
                    yield (url_idx, url)

        def download_job(obj):
            url_idx, url = obj

            fname = '_'.join(url.split('/')[-2:])

            fname, ext = os.path.splitext(fname)
            htmlname = fname + '.html'

            text_path = os.path.join(self.txt_dir, fname + '.txt')

            if os.path.exists(text_path):
                print("Already exists, skipping {}...".format(url))
                sys.stdout.write("\033[K")
            else:
                print("Total: {}, Downloading & Parsing: {}...".format(
                    num_urls, url_idx))
                sys.stdout.write("\033[K")

                r = requests.get(url)
                try:
                    # Parse html with Beautiful Soup
                    soup = BeautifulSoup(r.content, "html.parser")
                    text = soup.get_text("\n")

                    # Process Text
                    text = self._process_text(text)
                    text_path = os.path.join(self.txt_dir, fname + '.txt')

                    # Write to file
                    with codecs.open(text_path, 'w', encoding='utf-8') as fout:
                        fout.write(text)
                except BaseException as e:
                    print("{} parsing failed: {}".format(url, e))

        ncpus = cpu_count() if cpu_count() <= 8 else 8
        pool = ProcessPool(ncpus)
        pool.map(download_job, iter_path_generator(index_path))
    def run(self):
        """
        Run experiment
        """
        num_drivers = np.arange(1000, 6500, 500)
        # Create a pool of processes
        num_processes = mp.cpu_count()
        self.logger.info("Processes: {}".format(num_processes))
        pool = ProcessPool(nodes=num_processes)

        configs = []
        count = 0
        for drivers in num_drivers:
            self.config['RL_parameters'][
                'experiment'] = self.expt_name + "_" + str(count)
            self.config['RL_parameters'][
                'city_states_filename'] = "city_states.dill"
            self.config['RL_parameters']['num_drivers'] = drivers
            self.config['RL_parameters']['num_strategic_drivers'] = drivers
            configs.append(deepcopy(self.config))
            count += 1

        self.logger.info("Starting expt_02")

        results = pool.amap(self.run_rl_training, configs).get()
        pool.close()
        pool.join()
        pool.clear()

        self.logger.info("Finished expt_02")

        # Export best episode
        self.data_exporter.export_episode(results, self.expt_name + ".dill")
Beispiel #5
0
def main(args):
    setup = experiment_setups.parse(args.setup)
    dirname = fileutil.run_dir(args.dest_dir, setup.name,
                               args.max_quantifier_length, args.model_size,
                               args.name)
    file_util = FileUtil(dirname)

    languages = language_loader.load_languages(file_util)
    if args.comp_strat == 'wordcount':
        complexity_measurer = WordCountComplexityMeasurer(args.max_words)
    elif args.comp_strat == 'wordcomplexity':
        complexity_measurer = SumComplexityMeasurer(args.max_words, 1)
    elif args.comp_strat == 'special':
        complexity_measurer = SpecialComplexityMeasurer(args.max_words)
    else:
        raise ValueError('{0} is not a valid complexity strategy.'.format(
            args.comp_strat))

    with ProcessPool(nodes=args.processes) as pool:
        complexity = pool.map(complexity_measurer, languages)

    file_util.dump_dill(complexity,
                        'complexity_{0}.dill'.format(args.comp_strat))

    print("measure_complexity.py finished.")
    def run(self):
        """
        Run experiment
        """
        num_drivers = np.arange(1000, 6500, 500)
        thresholds = np.arange(5, 55, 5)
        thresholds = np.insert(thresholds, 0, 2)
        combinations = list(itertools.product(num_drivers, thresholds))

        # Create a pool of processes
        num_processes = mp.cpu_count()
        self.logger.info("Processes: {}".format(num_processes))
        pool = ProcessPool(nodes=num_processes)

        configs = []
        count = 0
        for comb in combinations:
            self.config['RL_parameters'][
                'experiment'] = self.expt_name + "_" + str(count)
            self.config['RL_parameters']['num_drivers'] = comb[0]
            self.config['RL_parameters']['imbalance_threshold'] = comb[1]
            configs.append(deepcopy(self.config))
            count += 1

        self.logger.info("Starting expt_04")

        results = pool.amap(self.run_rl_training, configs).get()
        pool.close()
        pool.join()
        pool.clear()

        self.logger.info("Finished expt_04")

        # Export best episode
        self.data_exporter.export_episode(results, self.expt_name + ".dill")
Beispiel #7
0
    def gen_operator_data(self, space, Nx, M, num, representation):
        print("Generating operator data...", flush=True)
        features = space.random(num)
        # Generate outputs
        x = np.linspace(0, self.T, num=self.Nx)[:, None]
        sensor_values = self.random_process(space.eval_u(features, x,
                                                         self.M))  # exp(b)
        p = ProcessPool(nodes=config.processes)
        s_values = np.array(p.map(self.eval_s, sensor_values))

        # Generate inputs
        sensors = np.linspace(0, self.T, num=Nx)[:, None]
        if representation == "samples":
            sensor_values = self.random_process(
                space.eval_u(features, sensors, M))
        elif representation == "KL":
            sensor_values = space.eval_KL_bases(features, sensors, M)
            # sensor_values = self.random_process(sensor_values)
        res = [
            make_triple(sensor_values[i], x, s_values[i], self.npoints_output)
            for i in range(num)
        ]
        res = np.vstack(res)
        m = sensor_values.shape[1]
        return [res[:, :m], res[:, m:-1]], res[:, -1:]
Beispiel #8
0
    def update_qfunction(self):
        if self.TWIN_Q:
            self.i = (self.i + 1) % 2

        if self.theta_q is None:  # generate critic network if none exist
            n = len(self.state_action_basis(self.state, self.action))
            if self.TWIN_Q:
                m = 2  # generate 2 q networks
            else:
                m = 1
            self.theta_q = np.random.normal(0, 0.3, (n, m))
        self.q_predicted = self.theta_q[:, self.
                                        i] @ self.xu_k  # recorded for analysis
        self.q_observed = self.r + self.BETA * self.theta_q[:, self.
                                                            i] @ self.xu_k1  # recorded for analysis

        if len(self.memory) > self.BATCH_SIZE:
            batch = random.sample(self.memory, self.BATCH_SIZE)

            pool = ProcessPool(nodes=self.config['simulation']['n_nodes'])
            batch_y = np.array(pool.map(self.process_exp, batch))
            batch_phi = np.array([
                self.state_action_basis(exp['state'], exp['action'])
                for exp in batch
            ])

            clf = Ridge(alpha=0.01)
            clf.fit(batch_phi, batch_y)
            temp_theta = clf.coef_
            self.theta_q[:, self.i] = self.ALPHA_q * temp_theta + (
                1 - self.ALPHA_q) * self.theta_q.flatten()
Beispiel #9
0
def get_full_content(json_data, num_cores):

	def param_generator(json_data):
		for data in json_data:
			yield data['id'], data['url']

	def tag_and_write_job(param):
		num_x, news_url = param
		logging.info("Processing news #{}: {}".format(num_x, news_url))

		news_content = GetUrlContent(news_url)

		## store all news (might be used for word2vec) ##
		with open('OpinionAnalysis/data/news_corpus.txt', 'a') as fp:
			fp.write('*\n')
			fp.write(news_content)

		return {'id':num_x, 'content':news_content}

	pool = ProcessPool(num_cores)
	new_json_data = pool.map(tag_and_write_job, param_generator(json_data))

	df = pd.DataFrame(json_data)
	df_new = pd.DataFrame(new_json_data)
	ret_df = df.merge(df_new, left_on='id', right_on='id')
	ret_json = json.loads(ret_df.to_json(orient='records'))

	return ret_json
Beispiel #10
0
def main():

    processes = args.processes
    setup = experiment_setups.parse(args.setup)
    max_quantifier_length = args.max_quantifier_length
    model_size = args.model_size
    
    file_util = FileUtil(fileutil.base_dir(args.dest_dir, setup.name, max_quantifier_length, model_size))
    
    
    universe = setup.generate_models(model_size)
    
    folderName = "{0}/{1}_length={2}_size={3}".format(args.dest_dir,setup.name,max_quantifier_length,model_size)
    os.makedirs("{0}".format(folderName), exist_ok=True)
    
    processpool = ProcessPool(nodes=processes)
    expression_generator = ExpressionGenerator(setup, model_size, universe, processpool)
    (generated_expressions_dict, expressions_by_meaning) = \
          expression_generator.generate_all_expressions(max_quantifier_length)
    
    print("{0} expressions!".format(len(expressions_by_meaning[bool].values())))
    
    file_util.dump_dill(expressions_by_meaning[bool], 'generated_expressions.dill')
    file_util.dump_dill(list(expressions_by_meaning[bool].values()), 'expressions.dill')
    file_util.dump_dill(list(expressions_by_meaning[bool].keys()), 'meanings.dill')
    
    processpool.close()
    processpool.join()
    
    print('Expression generation finished.')
Beispiel #11
0
def main(args):

    setup = experiment_setups.parse(args.setup)
    processes = setup.processes
    max_quantifier_length = setup.max_quantifier_length
    model_size = setup.model_size

    file_util = FileUtil(
        fileutil.base_dir(setup.dest_dir, setup.name, max_quantifier_length,
                          model_size))

    folderName = "{0}/{1}_length={2}_size={3}".format(setup.dest_dir,
                                                      setup.name,
                                                      max_quantifier_length,
                                                      model_size)

    processpool = ProcessPool(nodes=processes)

    expressions = file_util.load_dill('expressions.dill')

    complexities = processpool.map(
        lambda ex: setup.measure_expression_complexity(
            ex, max_quantifier_length), expressions)

    file_util.dump_dill(complexities, 'expression_complexities.dill')

    processpool.close()
    processpool.join()

    print('Complexity Measuring finished.')
Beispiel #12
0
def main(args):
    setup = experiment_setups.parse(args.setup)
    dirname = fileutil.run_dir(args.dest_dir, setup.name,
                               args.max_quantifier_length, args.model_size,
                               args.name)
    file_util = FileUtil(dirname)

    languages = language_loader.load_languages(file_util)

    universe = generator.generate_simplified_models(args.model_size)

    if args.inf_strat == 'exact':
        informativeness_measurer = InformativenessMeasurer(len(universe))
    elif args.inf_strat == 'simmax':
        informativeness_measurer = SimMaxInformativenessMeasurer(universe)
    else:
        raise ValueError('{0} is not a valid informativeness strategy.'.format(
            args.inf_strat))

    with ProcessPool(nodes=args.processes) as pool:
        informativeness = pool.map(informativeness_measurer, languages)

    file_util.dump_dill(informativeness,
                        'informativeness_{0}.dill'.format(args.inf_strat))

    print("measure_informativeness.py finished.")
Beispiel #13
0
def pathos_mp_batch_evaluator(
    func,
    arguments,
    n_cores=N_CORES,
    error_handling="continue",
    unpack_symbol=None,
):
    """Batch evaluator based on pathos.multiprocess.ProcessPool

    This uses a patched but older version of python multiprocessing that replaces
    pickling with dill and can thus handle decorated functions.

    Args:
        func (Callable): The function that is evaluated.
        arguments (Iterable): Arguments for the functions. Their interperation
            depends on the unpack argument.
        n_cores (int): Number of cores used to evaluate the function in parallel.
            Value below one are interpreted as one. If only one core is used, the
            batch evaluator disables everything that could cause problems, i.e. in that
            case func and arguments are never pickled and func is executed in the main
            process.
        error_handling (str): Can take the values "raise" (raise the error and stop all
            tasks as soon as one task fails) and "continue" (catch exceptions and set
            the output of failed tasks to the exception object without raising it.
            KeyboardInterrupt and SystemExit are always raised.
        unpack_symbol (str or None). Can be "**", "*" or None. If None, func just takes
            one argument. If "*", the elements of arguments are positional arguments for
            func. If "**", the elements of arguments are keyword arguments for func.


    Returns:
        list: The function evaluations.

    """
    if not pathos_is_available:
        raise NotImplementedError(
            "To use the pathos_mp_batch_evaluator, install pathos with "
            "conda install -c conda-forge pathos.")

    _check_inputs(func, arguments, n_cores, error_handling, unpack_symbol)
    n_cores = int(n_cores)

    reraise = error_handling == "raise"

    @unpack(symbol=unpack_symbol)
    @catch(default="__traceback__", reraise=reraise)
    def internal_func(*args, **kwargs):
        return func(*args, **kwargs)

    if n_cores <= 1:
        res = [internal_func(arg) for arg in arguments]
    else:
        p = ProcessPool(nodes=n_cores)
        try:
            res = p.map(internal_func, arguments)
        except Exception as e:
            p.terminate()
            raise e

    return res
Beispiel #14
0
def parcompute_example():
    dc = PMPExample()
    dc2 = PMPExample()
    dc3 = PMPExample()
    dc4 = PMPExample()

    n_datapoints = 100
    inp_data = range(n_datapoints)
    r1 = dc.threadcompute(inp_data)
    assert (len(dc.cache) == n_datapoints)

    r2 = dc2.processcompute(inp_data)
    assert (len(dc2.cache) == 0)
    assert (r1 == r2)

    r3 = ProcessPool(4).map(dc3.compute, inp_data)
    r4 = ThreadPool(4).map(dc4.compute, inp_data)
    assert (r4 == r3 == r2)
    assert (len(dc3.cache) == 0)
    assert (len(dc4.cache) == n_datapoints)

    log.info("Size of threadpooled class caches: {0}, {1}".format(
        len(dc.cache), len(dc4.cache)))
    log.info("Size of processpooled class caches: {0}, {1}".format(
        len(dc2.cache), len(dc3.cache)))
Beispiel #15
0
    def count(self,
              name='e1',
              meta='count',
              nodes=None,
              debug=False,
              parallel=False):
        """
        count number of points in the neighborhood
        """
        self.estimates[name] = {}
        self.estimates[name]['vname'] = None
        self.estimates[name][meta] = meta

        if nodes is None:
            nodes = self.nodes

        def f(i):
            # update data selected around target point
            self.search.update([self.x0[i], self.y0[i], self.z0[i]])
            if debug:
                return np.sum(
                    self.search.test), self.search.row_id[self.search.test]
            else:
                return np.sum(self.search.test), None

        # apply the estimator to each target
        if parallel:
            pool = ProcessPool()
            self.estimates[name]['estimate'] = np.array(pool.map(f, nodes))
        else:
            self.estimates[name]['estimate'] = np.array(list(map(f, nodes)))
    def run(self):
        """
        Run experiment
        """
        num_drivers = self.config['RL_parameters']['num_drivers']
        percent_strategic_drivers = np.arange(0, 1.1, 0.1)
        num_strategic_drivers = [int(x * num_drivers) for x in percent_strategic_drivers]

        # Create a pool of processes
        num_processes = mp.cpu_count()
        pool = ProcessPool(nodes=num_processes)

        configs = []
        count = 0
        for drivers in num_strategic_drivers:
            self.config['RL_parameters']['experiment'] = self.expt_name + "_" + str(count)
            self.config['RL_parameters']['num_strategic_drivers'] = drivers
            configs.append(deepcopy(self.config))
            count += 1

        self.logger.info("Starting expt_05")

        results = pool.amap(self.run_rl_training, configs).get()
        pool.close()
        pool.join()
        pool.clear()

        self.logger.info("Finished expt_05")

        # Export best episode
        self.data_exporter.export_episode(results, self.expt_name + ".dill")
    def run(self):
        self.logger.info("Starting baselines")
        city_states = self.data_provider.read_city_states()
        baseline_list = self.config['baselines']['baseline_list']

        # Create a pool of processes
        num_processes = mp.cpu_count()
        self.logger.info("Processes: {}".format(num_processes))
        pool = ProcessPool(nodes=num_processes)

        configs = []
        for count in range(10):
            for name in baseline_list:
                configs.append({
                    'name': name,
                    'count': count,
                    'config': self.config,
                    'city_states': city_states
                })

        results = pool.amap(self.run_baseline, configs).get()
        pool.close()
        pool.join()
        pool.clear()

        episode_rewards = []
        for result in results:
            episode_rewards += result

        self.data_exporter.export_baseline_data(episode_rewards)
        self.logger.info("Finished baselines")
 def land_routine(self):
     while self.took_off:
         pool = ProcessPool()
         r = pool.map(self.client_land, self.agent_names)
         rospy.loginfo('Landing responses:')
         rospy.loginfo(r)
         self.took_off = not all(r)
     return True
Beispiel #19
0
def map_reduce_multicore(
        f: tp.Callable[..., ResultType],
        reduction: tp.Callable[[ResultType, ResultType], ResultType],
        initial_value: tp.Optional[ResultType] = None,
        args_list: tp.Optional[tp.Sequence[tp.Sequence]] = None,
        kwargs_list: tp.Optional[tp.Sequence[tp.Dict[str, tp.Any]]] = None,
        number_of_batches: tp.Optional[int] = None,
        multiprocessing_pool_type: MultiprocessingPoolType = MultiprocessingPoolType.default()) \
        -> ResultType:

    if number_of_batches is None:
        if args_list is not None:
            number_of_batches = len(args_list)
        elif kwargs_list is not None:
            number_of_batches = len(kwargs_list)
        else:
            raise ValueError('Number_of_batches must be defined if '
                             'both args_list and kwargs_list are empty')

    if args_list is None:
        args_list = number_of_batches * [list()]
    if kwargs_list is None:
        kwargs_list = number_of_batches * [dict()]

    result = initial_value
    if multiprocessing_pool_type == MultiprocessingPoolType.LOKY:
        from concurrent.futures import as_completed
        from loky import get_reusable_executor

        executor = \
            get_reusable_executor(timeout=None,
                                  context='loky')

        futures = [
            executor.submit(f, *args, **kwargs)
            for args, kwargs in zip(args_list, kwargs_list)
        ]

        result_from_future = lambda x: x.result()
    elif multiprocessing_pool_type == MultiprocessingPoolType.PATHOS:
        from pathos.pools import ProcessPool
        pool = ProcessPool()
        futures = [
            pool.apipe(f, *args, **kwargs)
            for args, kwargs in zip(args_list, kwargs_list)
        ]

        result_from_future = lambda x: x.get()
    else:
        raise ValueError(
            f'Multiprocessing pool type {multiprocessing_pool_type} not supported'
        )

    for future in futures:
        result = reduce_with_none(result, result_from_future(future),
                                  reduction)

    return result
Beispiel #20
0
    def __init__(self,
                 func,
                 schema,
                 ds,
                 scheduler: str = "single",
                 workers: int = 1,
                 **kwargs):
        """| Transform applies a user defined function to each sample in single threaded manner.

        Parameters
        ----------
        func: function
            user defined function func(x, **kwargs)
        schema: dict of dtypes
            the structure of the final dataset that will be created
        ds: Iterative
            input dataset or a list that can be iterated
        scheduler: str
            choice between "single", "threaded", "processed"
        workers: int
            how many threads or processes to use
        **kwargs:
            additional arguments that will be passed to func as static argument for all samples
        """
        self._func = func
        self.schema = schema
        self._ds = ds
        self.kwargs = kwargs
        self.workers = workers

        if isinstance(self._ds, Transform):
            self.base_ds = self._ds.base_ds
            self._func = self._ds._func[:]
            self._func.append(func)
            self.kwargs = self._ds.kwargs[:]
            self.kwargs.append(kwargs)
        else:
            self.base_ds = ds
            self._func = [func]
            self.kwargs = [kwargs]

        if scheduler == "threaded" or (scheduler == "single" and workers > 1):
            self.map = ThreadPool(nodes=workers).map
        elif scheduler == "processed":
            self.map = ProcessPool(nodes=workers).map
        elif scheduler == "single":
            self.map = map
        elif scheduler == "ray":
            try:
                from ray.util.multiprocessing import Pool as RayPool
            except Exception:
                pass
            self.map = RayPool().map
        else:
            raise Exception(
                f"Scheduler {scheduler} not understood, please use 'single', 'threaded', 'processed'"
            )
Beispiel #21
0
    def pool(self):
        """
        Returns: The computing pool to process run the queries

        """
        if self.servers is None:
            return ProcessPool(nodes=self.workers)
        else:
            return ParallelPool(nodes=self.workers, servers=self.servers)
Beispiel #22
0
def mlp():
    if not os.path.exists(tar_dir):
        os.makedirs(tar_dir)

    iterator = glob(os.path.join(src_dir, '*.txt'))

    ncpus = cpu_count() if cpu_count() <= 8 else 8
    pool = ProcessPool(ncpus)
    pool.map(preprocess_job, iterator)
    def run(self):
        """
        Run experiment
        """
        days = [
            'Sunday_00_', 'Monday_00_', 'Tuesday_00_', 'Wednesday_00_',
            'Thursday_00_', 'Friday_00_', 'Saturday_00_', 'Sunday_01_',
            'Monday_01_', 'Tuesday_01_', 'Wednesday_01_', 'Thursday_01_',
            'Friday_01_', 'Saturday_01_', 'Sunday_02_', 'Monday_02_',
            'Tuesday_02_', 'Wednesday_02_', 'Thursday_02_', 'Friday_02_',
            'Saturday_02_', 'Sunday_03_', 'Monday_03_', 'Tuesday_03_',
            'Wednesday_03_', 'Thursday_03_', 'Friday_03_', 'Saturday_03_',
            'Sunday_04_', 'Monday_04_', 'Tuesday_04_', 'Wednesday_04_',
            'Thursday_04_', 'Friday_04_', 'Saturday_04_'
        ]

        num_drivers = [4000, 5000, 6000, 7000, 8000, 9000, 10000]

        imbalance_thresholds = [2]

        # Create a pool of processes
        num_processes = mp.cpu_count()
        self.logger.info("Processes: {}".format(num_processes))
        pool = ProcessPool(nodes=num_processes)

        configs = []
        count = 0

        for d in num_drivers:
            for threshold in imbalance_thresholds:
                for day in days:
                    self.config['RL_parameters']['num_drivers'] = d
                    self.config['RL_parameters']['num_strategic_drivers'] = d

                    self.config['RL_parameters'][
                        'imbalance_threshold'] = threshold
                    self.config['RL_parameters'][
                        'experiment'] = self.expt_name + "_" + str(count)
                    if os.path.isfile(self.config['app']['DATA_DIR'] +
                                      'city_states/' + day +
                                      'city_states.dill'):
                        self.config['RL_parameters'][
                            'city_states_filename'] = day + 'city_states.dill'
                        self.config['RL_parameters']['best_model_filename'] = (
                            day + str(d) + '_' + str(threshold) +
                            '_model.dill')
                        configs.append(deepcopy(self.config))
                        count += 1

        self.logger.info("Starting expt_07")

        results = pool.amap(self.run_rl_training, configs).get()
        pool.close()
        pool.join()
        pool.clear()

        self.logger.info("Finished expt_07")
Beispiel #24
0
def opt_ind_params(perf_f, res, games, gids, role, pid, init_params, bounds):
    def min_f(params_test):
        return (-perf_f(params_test, res, games, gids, role, pid))

    pool = ProcessPool(nodes=mp.cpu_count())
    opt = scp.optimize.differential_evolution(min_f, bounds, workers=pool.map)
    return opt
    pool.close()
    pool.join()
    pool.clear()
Beispiel #25
0
    def eval_s_space(self, space, features, x):
        """For a list of functions in `space` represented by `features`
        and a list `x`, compute the corresponding list of outputs.
        """
        def f(feature, xi):
            return self.eval_s(lambda t: space.eval_u_one(feature, t), xi[0])

        p = ProcessPool(nodes=config.processes)
        res = p.map(f, features, x)
        return np.array(list(res))
Beispiel #26
0
    def extract(self):
        def text_gen(txt_dir):
            # Yields markup & name
            for fname in os.listdir(txt_dir):
                if not fname.endswith('.txt'):
                    continue
                yield fname

        def parsing_job(fname):
            print("Parsing: {}".format(fname))
            # Read text
            filepath = os.path.join(self.txt_dir, fname)
            with codecs.open(filepath, 'rb', encoding='utf-8') as fin:
                text = fin.read()

            name, ext = os.path.splitext(fname)
            # Parse MDA part

            msg = ""
            mda, end = self.parse_mda(text)
            # Parse second time if first parse results in index
            if mda and len(mda.encode('utf-8')) < 1000:
                mda, _ = self.parse_mda(text, start=end)

            if mda:  # Has value
                msg = "SUCCESS"
                mda_path = os.path.join(self.mda_dir, name + '.mda')
                with codecs.open(mda_path, 'w', encoding='utf-8') as fout:
                    fout.write(mda)
            else:
                msg = msg if mda else "MDA NOT FOUND"
            print("{},{}".format(name, msg))
            return name + '.txt', msg  #

        ncpus = cpu_count() if cpu_count() <= 8 else 8
        pool = ProcessPool(ncpus)

        _start = time.time()
        parsing_failed = pool.map( parsing_job, \
                                   text_gen(self.txt_dir) )
        _end = time.time()

        print("MDA parsing time taken: {} seconds.".format(_end - _start))

        # Write failed parsing list
        count = 0
        parsing_log = 'parsing.log'
        with open(parsing_log, 'w') as fout:
            print("Writing parsing results to {}".format(parsing_log))
            for name, msg in parsing_failed:
                fout.write('{},{}\n'.format(name, msg))
                if msg != "SUCCESS":
                    count = count + 1

        print("Number of failed text:{}".format(count))
def runGOanalysis(clusters, processes=10):
    df = pd.DataFrame()
    pool = ProcessPool(nodes=processes)
    newDf = pool.map(_runGOanalysis, clusters)
    pool.close()
    pool.join()
    df = pd.concat([df, newDf], axis=0)
    geneIndex = pd.read_excel(os.path.join(goaResultDir, 'EntrezIndex.xlsx'), index_col=0)
    geneIndex = pd.concat([geneIndex, newEntrez], axis=0)
    geneIndex.to_excel(os.path.join(goaResultDir, 'EntrezIndex.xlsx'))
    return(geneIndex)
Beispiel #28
0
def reachability(model,
                 from_state,
                 goal,
                 max_length=2000,
                 on_start=None,
                 on_reach=None,
                 max_repeat=10000,
                 n_workers=1):
    if isinstance(model, pypint.Model):
        model = pypint_to_model(model)
    if isinstance(goal, list) or isinstance(goal, dict):
        goal = Goal(goal)
    if isinstance(from_state, list):
        if from_state:
            if isinstance(from_state[0], str):
                from_state = dict([(e, 1) for e in from_state])
            elif isinstance(from_state[0], tuple):
                from_state = dict(from_state)
    from_state = complete_state(from_state, model)
    trace = Trace(from_state)
    if on_start is not None:
        next_subgoal = goal.subgoals[0]
        on_start(model, trace, next_subgoal)
    if n_workers == 1:
        for n_repeat in range(max_repeat):
            reached, trace = _reach(copy.copy(model), from_state, goal,
                                    max_length, on_start, on_reach)
            if reached is True:
                return reached, trace
    else:
        pool = ProcessPool(n_workers)
        processes = set([])
        n_repeat = 0
        while n_repeat < max_repeat and n_repeat < n_workers:
            processes.add(
                pool.apipe(_reach, copy.copy(model), from_state, goal,
                           max_length, on_start, on_reach))
            n_repeat += 1
        reached = pypint.Inconc
        while reached is not True and n_repeat < max_repeat:
            for process in processes:
                if process.ready():
                    reached, trace = process.get()
                    processes.remove(process)
                    if reached is True:
                        return reached, trace
                    else:
                        processes.add(
                            pool.apipe(_reach, copy.copy(model), from_state,
                                       goal, max_length, on_start, on_reach))
                        n_repeat += 1
                        break
    return reached, trace
Beispiel #29
0
def multi_process(data_path, time_list):
    for time in time_list[:]:
        # print(time)
        base_path = arrow.get(time['ini']).format('YYYYMMDDHH')
        # --预报数据处理
        gefs_fcst = GEFSFcst(data_path['gefs_fcst'], time, base_path)
        p = ProcessPool(7)
        for n in range(21):
            # gefs_fcst.download(n)
            p.apipe(download, gefs_fcst, n)
        p.close()
        p.join()
        p.clear()
Beispiel #30
0
    def eval_KL_bases(self, ls, sensors, M):
        def helper(l):
            grf = GRF_KL(
                self.T,
                kernel=self.kernel,
                length_scale=l[0],
                num_eig=M,
                N=self.N,
                interp=self.interp,
            )
            return np.ravel(grf.bases(sensors))

        p = ProcessPool(nodes=config.processes)
        return np.vstack(p.map(helper, ls))