Example #1
0
def runTrainTopTestAll(aType):
    logging.info('Running ' + aType)

    if aType == 'ALL':
        aND = np.asarray(allTop)
    elif aType == 'INTER':
        aND = np.asarray(interTop)
    elif aType == 'LOCUS':
        aND = np.asarray(locusTop)
    elif aType == 'BLIA':
        aND = np.asarray(bliaTop)
    elif aType == 'BugLocator':
        aND = np.asarray(bugLTop)
    elif aType == 'BRTracer':
        aND = np.asarray(brtTop)
    elif aType == 'AMALGAM':
        aND = np.asarray(amalTop)
    elif aType == 'BLUiR':
        aND = np.asarray(bluirTop)

    elif aType == 'NALL':
        aND = np.asarray(allNTop)


    from sklearn.model_selection import KFold
    if aType == 'AMALGAM':
        logging.info(len(aND))
        kf = KFold(n_splits=len(aND), shuffle=True)

    else:
        kf = KFold(n_splits=10,shuffle=True)  # Define the split - into 2 folds
    if aType != 'BLUiR':
        iTer = 0
        tuples = []
        for train_index, test_index in kf.split(aND):
            X_train, X_test = aND[train_index], aND[test_index]
            aTuple = X_train, X_test, aType, iTer,simiFiles,'_TESTALL'
            tuples.append(aTuple)
            iTer += 1

        pool = Pool(core,maxtasksperchild=1)

        pool.map_async(corePredict, [link for link in tuples], chunksize=1)

        pool.close()
        pool.join()
    else:
        aTuple = aND, None, aType, 0, simiFiles, '_TESTALL'
        corePredict(aTuple, True)
Example #2
0
def execute_all(runs, mode="a"):
    pool = Pool(processes=multiprocessing.cpu_count())
    result = pool.map_async(execute, runs, chunksize=1)

    start_time = time.time()
    while not result.ready():
        completed = len(runs) - result._number_left
        if completed > 0:
            rate = 60.0 * completed / (time.time() - start_time)
            percent = 100.0 * completed / len(runs)
            print(
                "%04d/%04d %4.1f%% %s %4d/min  ETA: %3.1f\r"
                % (
                    completed,
                    len(runs),
                    percent,
                    progressbar(percent / 100),
                    rate,
                    1.0 / rate * result._number_left,
                ),
                end=" ",
            )
        sys.stdout.flush()
        time.sleep(0.2)
    print("\n")
    pool.close()
Example #3
0
def parallel_variability_analysis(tmodel,
                                  kind='reactions',
                                  proc_num=BEST_THREAD_RATIO):
    """
    WIP.

    :param tmodel:
    :param kind:
    :param proc_num:
    :return:
    """

    raise (NotImplementedError)

    objective = tmodel.objective

    if kind == Reaction or kind.lower() in ['reaction', 'reactions']:
        these_vars = tmodel.reactions
    else:
        these_vars = tmodel.get_variables_of_type(kind)

    func = partial(_variability_analysis_element, tmodel)

    pool = Pool(processes=proc_num)
    async_result = pool.map_async(func, these_vars)
    pool.close()
    pool.join()

    # aggregated_result = pd.DataFrame(async_result.get(),
    #                                  columns = ['minimize','maximize'])

    tmodel.objective = objective
    return async_result
Example #4
0
def run():
    nkeys = int(input('Enter number of keys: '))
    keys = input('Enter the keys: ').strip().split()
    assert len(keys) == nkeys, 'len(keys) != nkeys'
    p = list(map(float, input('Enter success probabilities (p): ').split()))
    assert len(p) == nkeys, 'len(p) != nkeys'
    q = list(map(float, input('Enter failure probabilities (q): ').split()))
    assert len(q) == (nkeys + 1), 'len(q) != nkeys + 1'

    key_array = [Key(val, p1) for val, p1 in zip(keys, p)]
    
    all_permutations = it.permutations(key_array)

    pool = Pool(processes=8)
    
    start_time = time.time()
    result = pool.map_async(solve, [OBSTSolver(keys=perm, q=q[:]) for perm in all_permutations])
    pool.close()
    pool.join()
    end_time = time.time()


    results = result.get(1)

    print("\n*** OBST Calculated ***")
    min_root, min_cost = min(results, key=lambda x: x[1])
    print("Minimum Cost: {}\n".format(min_cost))
    min_root.inorder()
    print("")
    print("Time taken: {} ms.".format((end_time - start_time)*1000))
Example #5
0
def full_languages_modules_run(langfilter: LIST_STR, modfilter: LIST_STR,
                               brows: LIST_STR) -> None:
    """Run the selected set of modules and locales, logging results,
    and saving a screenshot in case of failure.    By default, will run all of them."""
    output = '\n"START: {0}", {1}\n'.format(
        get_time(), ','.join(modfilter).upper())  # header row.
    pool = Pool(cpu_count() * 2)
    try:
        asy = pool.map_async(
            do_locale,
            [(x, LANGS, MOD_STEM, CMOD_STEM, modfilter, b, BROWSERS[b], ARGS)
             for x in langfilter for b in brows])
        while True:
            if asy.ready():
                break
            time.sleep(
                1
            )  # Poolmapwaiting blocks KeyboardInterrupts, so don't do that.
    except KeyboardInterrupt:
        pool.terminate()
        raise
    results = asy.get()
    output += '\n'.join(results)  # Each locale's row.
    output += '\n"FINISH: {0}"\n\n'.format(get_time())  # Footer row.
    try:
        with open(RESULTS_FILE, mode='a', encoding='UTF-8') as log:
            log.write(output)
    except PermissionError:
        print('In future, be sure to not leave the log file open.')
        print('That tends to lock it, so now it cannot be written to.')
        print('\n\nNow, you have to try to read raw CSV from a console:\n\n')
        print(output)
def main():
    url_list = get_url_list()

    pool = Pool(processes=config.N_WORKERS)
    result = pool.map_async(grab_data, url_list)

    write_md(result.get())
Example #7
0
    def _execute_import(self, files_to_scan: List[str]) -> (Set[str], Set[str]):
        new_or_changed_files = set()
        undetected_files = set()

        self._files_count = self._count_files_to_scan()
        self.emit_event_main_thread("scan-progress", 0.05)
        self._progress = 0

        pool = Pool()
        while True:
            try:
                job = pool.map_async(import_file, itertools.islice(files_to_scan, CHUNK_SIZE))
            except StopIteration as e:
                log.warning("importer", e, "_execute_import raised a stop iteration.")
                break

            self._wait_for_job_to_complete(job)
            import_result = job.get()

            undetected_files.update({file for file in import_result if isinstance(file, str)})
            media_files = {file for file in import_result if isinstance(file, MediaFile)}
            new_or_changed_files.update((file.path for file in media_files))

            self._progress += CHUNK_SIZE

            if len(media_files) != 0:
                self._database_importer.insert_many(media_files)
            if self._progress >= self._files_count:
                break
        pool.close()

        return new_or_changed_files, undetected_files
Example #8
0
def async_multiprocess(my_list):
    pool = Pool()

    start_time = time()

    result = pool.map_async(add_one, my_list)
    pool.close()

    print(async_multiprocess.__name__,
          '\nRequired time: {:.6f}\n'.format(time() - start_time),
          result.get()[-5:], end='\n\n')
Example #9
0
class MultiprocessEvaluator(Evaluator[S]):
    def __init__(self, processes: int = None):
        super().__init__()
        self.pool = Pool(processes)

    def evaluate(self, solution_list: List[S], problem: Problem) -> List[S]:
        # return self.pool.map(functools.partial(evaluate_solution, problem=problem), solution_list)
        result = self.pool.map_async(
            functools.partial(evaluate_solution, problem=problem),
            solution_list)
        return result.get(timeout=1000)
Example #10
0
    def run(self):
        cases = self.get_test_case()
        # 定义一个进程池
        pool = Pool(processes=len(cases))

        result.append(pool.map_async(self.init_driver, cases.values()))

        pool.close()
        pool.join()

        while not q.empty():
            comm.Template.set_middle(q.get())
Example #11
0
def poolHandle(zip,nid):
	if DEBUG_LEVEL ==0 : 	
		p = Pool(80)
		for sub in zip.namelist():
			fobj = getSubFobj(zip,sub)
			if fobj != None : p.apply_async(handleSub,args=(fobj,nid))
		p.close()  
		p.join()
	elif DEBUG_LEVEL ==1 :
		p = billiard.Pool()
		_finalizers.append(Finalize(p, p.terminate))
		try:
			p.map_async(handleSub, [(getSubFobj(zip,sub),nid) for sub in zip.namelist()])
			p.close()
			p.join()
		finally:
			p.terminate()
	else :
		for sub in zip.namelist():
			fobj = getSubFobj(zip,sub)
			if fobj != None : handleSub(fobj,nid)
	zip.close()
Example #12
0
 def download_chunks(self, max_workers=5):
     print('Will now download chunks.')
     original_sigint_handler = signal.signal(signal.SIGINT, signal.SIG_IGN)
     executor = Pool(max_workers)
     signal.signal(signal.SIGINT, original_sigint_handler)
     try:
         r = executor.map_async(self.get, self.urls)
         result = list(r.get(43200))
         DownloadResultProcessor.process_and_print(result)
     except KeyboardInterrupt:
         executor.terminate()
     else:
         executor.close()
     executor.join()
Example #13
0
 def sprinter(self):
     """
     Called when parallelize is True.
     This function will generate the file names in a directory tree by adding directories to a Queue and
     continuously exploring directories in the Queue until Queue is emptied.
     Significantly faster than crawler method for larger directory trees.
     """
     self._printer('Multiprocess Walk')
     # Loop through directories in case there is more than one (1)
     for directory in self.directory:
         self._get_root_files(
             directory
         )  # Add file within root directory if filepaths is empty
         # acquire the list of paths
         first_level_dirs = next(os.walk(directory))[1]
         for path in first_level_dirs:
             self.unsearched.put((directory, path))
     self._printer('Pool Processing STARTED')
     pool = Pool(self.pool_size)
     pool.map_async(self.parallel_worker, range(self.pool_size))
     pool.close()
     self.unsearched.join()
     self._printer('Pool Processing ENDED')
     return self.filepaths
def main() -> None:
    """Main function"""
    if not args.token:
        logging.error(
            "GitHub Token is missing. Please pass your GitHub token key as a --token=xxxxxx"
        )
        sys.exit(1)
        return

    url_list = get_url_list()

    pool = Pool(processes=args.workers)
    result = pool.map_async(grab_data, url_list)

    write_md(result.get())
Example #15
0
def execute_all(runs):
    print('starting runs')
    nproc = multiprocessing.cpu_count()
    pool = Pool(processes=nproc)
    result = pool.map_async(execute, runs, chunksize=1)
    start_time = time.time()
    while not result.ready():
        completed = len(runs) - result._number_left
        if completed > 0:
            rate = 60.0 * completed / (time.time() - start_time)
            percent = 100.0 * completed / len(runs)
            print('%04d/%04d %4.1f%% %s %4d/min  ETA: %3.1f\n' %
                  (completed, len(runs), percent, progressbar(
                      percent / 100), rate, 1.0 / rate * result._number_left))
        sys.stdout.flush()
        time.sleep(1)
    print('\ncompleted', len(runs) - result._number_left)
    pool.close()
Example #16
0
    def image_urls(self):
        """ Iterates over json obj, gets image links
            Creates pool of workers, creates new workers """
        json_obj = self.jsonify()

        for post in json_obj["posts"]:
            if "ext" in post:
                self.total_count.value += 1

        try:
            self.thread_name = self.args.name
        except (KeyError, NameError):
            self.thread_name = json_obj["posts"][0]["sub"].replace(" ", "_")
        else:
            self.thread_name = str(json_obj["posts"][0]["no"])

        for post in json_obj["posts"]:
            if "ext" in post:
                filename = post["tim"] + post["ext"]
                image_url = "https://8ch.net/{board}/src/{file}".format(board=self.board, file=filename)
                self.downloads.append((image_url, filename))
                self.download_image(image_url, filename)

                with self.counter.get_lock():
                    self.counter.value += 1
                    update_progress(self.counter.value, self.total_count.value)

        pool = Pool(self.workers)
        pool_map = pool.map_async(self.download_image, self.downloads)

        try:
            pool_map.get(0xFFFF)
        except KeyboardInterrupt:
            print("Aborting")
            pool.terminate()
            pool.join()
        else:
            pool.close()
            pool.join()
Example #17
0
    def image_urls(self):
        """ Iterates over json obj, gets image links
            Creates pool of workers, creates new workers """
        json_obj = self.jsonify()

        for post in json_obj['posts']:
            if 'ext' in post:
                self.total_count.value += 1

        self.thread_name = json_obj['posts'][0]['semantic_url']

        for post in json_obj['posts']:
            if 'ext' in post:
                filename = str(post['tim']) + post['ext']
                image_url = 'https://i.4cdn.org/{board}/{file}'.format(
                    board=self.board, file=filename)
                self.filename.append(filename)
                self.downloads.append(image_url)
                self.download_image(image_url, filename)

                with self.counter.get_lock():
                    self.counter.value += 1
                    update_progress(self.counter.value, self.total_count.value)

        manager = Manager()
        pool_data = manager.list(self.downloads)
        partial_data = partial(self.download_image, pool_data)
        pool = Pool(self.workers)
        pool_map = pool.map_async(partial_data, self.filename)

        try:
            pool.close()
            pool.join()
        except KeyboardInterrupt:
            print("Aborting")
            pool.terminate()
            pool.join()
Example #18
0
def parallel_variability_analysis(tmodel,
                                  kind='reactions',
                                  proc_num=BEST_THREAD_RATIO):
    """
    WIP.

    :param tmodel:
    :param kind:
    :param proc_num:
    :return:
    """

    objective = tmodel.objective

    if kind == Reaction or kind.lower() in ['reaction', 'reactions']:
        these_vars = tmodel.reactions
    else:
        these_vars = tmodel.get_variables_of_type(kind)

    aggregate_results = {}
    for what in ("min", "max"):
        if proc_num > 1:
            #chunk_size = len(these_vars) //proc_num
            pool = Pool(processes=proc_num, initializer=mute)
            func = partial(_variability_analysis_element, tmodel, sense=what)
            pool = Pool(processes=proc_num, initializer=mute)
            async_result = pool.map_async(func, these_vars)
            pool.close()
            pool.join()
        else:
            print("Multiple threads need to be specified")
            raise (NotImplementedError)
        aggregate_results[what] = async_result.get()
    dataframe_results = pd.DataFrame(aggregate_results)

    return (dataframe_results)
Example #19
0
 def map_async(self, func, iterable, chunksize=None, callback=None):
   return Pool.map_async(self, LogExceptions(func), iterable, chunksize, callback)
Example #20
0
class LLTInf(object):
    """Obtains a decision tree that classifies the given labeled traces.

    traces : a Traces object
             The set of labeled traces to use as training set
    depth : integer
            Maximum depth to be reached
    optimize_impurity : function. Optional, defaults to optimize_inf_gain
                        A function that obtains the best parameters for a test
                        in a given node according to some impurity measure. The
                        should have the following prototype:
                            optimize_impurity(traces, primitive, rho, disp) :
                                (primitive, impurity)
                        where traces is a Traces object, primitive is a depth 2
                        STL formula, rho is a list with the robustness degree of
                        each trace up until this node in the tree and disp is a
                        boolean that switches output display. The impurity
                        returned should be so that the best impurity is the
                        minimum one.
    stop_condition : list of functions. Optional, defaults to [perfect_stop]
                     list of stopping conditions. Each stopping condition is a
                     function from a dictionary to boolean. The dictionary
                     contains all the information passed recursively during the
                     construction of the decision tree (see arguments of
                     lltinf_).
    disp : a boolean
           Switches displaying of debuggin output

    Returns a DTree object.

    TODO: Fix comments

    """
    def __init__(
        self,
        depth=1,
        primitive_factory=llt.make_llt_primitives,
        optimize_impurity=impurity.ext_inf_gain,
        stop_condition=None,
        redo_after_failed=1,
        optimizer_args=None,
        times=None,
        fallback_impurity=impurity.inf_gain,
        log=False,
    ):
        self.depth = depth
        self.primitive_factory = primitive_factory
        self.optimize_impurity = optimize_impurity
        self.fallback_impurity = fallback_impurity
        if stop_condition is None:
            self.stop_condition = [perfect_stop]
        else:
            self.stop_condition = stop_condition
        if optimizer_args is None:
            optimizer_args = {}
        self.optimizer_args = optimizer_args
        self.times = times
        self.interpolate = times is not None
        if self.interpolate and len(self.times) > 1:
            self.tinter = self.times[1] - self.times[0]
        else:
            self.tinter = None
        self.tree = None
        self.redo_after_failed = redo_after_failed
        self._partial_add = 0
        self.log = log
        if "workers" not in self.optimizer_args:
            self.pool = Pool(initializer=_pool_initializer)

            def pool_map(func, iterable):
                try:
                    return self.pool.map_async(func, iterable).get(timeout=120)
                except KeyboardInterrupt:
                    self.pool.terminate()
                    self.pool.join()
                    raise KeyboardInterrupt()

            self.pool_map = pool_map
            self.optimizer_args["workers"] = self.pool_map

    def __del__(self):
        if hasattr(self, "pool"):
            self.pool.terminate()
            self.pool.join()

    def __exit__(self):
        if hasattr(self, "pool"):
            self.pool.terminate()
            self.pool.join()

    def fit(self, traces, disp=False):
        np.seterr(all="ignore")
        self.tree = self._lltinf(traces, None, self.depth, disp=disp)
        return self

    def fit_partial(self, traces, disp=False):
        if self.tree is None:
            return self.fit(traces, disp=disp)
        else:
            preds = self.predict(traces.signals)
            failed = set()
            for i in range(len(preds)):
                leaf = self.tree.add_signal(traces.signals[i],
                                            traces.labels[i], self.interpolate,
                                            self.tinter)
                if preds[i] != traces.labels[i]:
                    failed.add(leaf)

            # logger.debug("Failed set: {}".format(failed))

            self._partial_add += len(failed)
            if self._partial_add // self.redo_after_failed > 0:
                # logger.debug("Redoing tree")
                self._partial_add = 0
                return self.fit(self.tree.traces, disp=disp)
            else:
                for leaf in failed:
                    # TODO don't redo whole node, only leaf
                    tree = self._lltinf(
                        leaf.traces,
                        leaf.robustness,
                        self.depth - leaf.level(),
                        disp=disp,
                    )
                    old_tree = leaf.copy()
                    leaf.set_tree(tree)

                # FIXME only for perfect_stop
                preds = self.predict(traces.signals)
                if not np.array_equal(preds, traces.labels):
                    self._partial_add = 0
                    return self.fit(self.tree.traces, disp=disp)
                return self

    def predict(self, signals):
        if self.tree is not None:
            return np.array([
                self.tree.classify(s, self.interpolate, self.tinter)
                for s in signals
            ])
        else:
            raise ValueError("Model not fit")

    def get_formula(self):
        if self.tree is not None:
            return self.tree.get_formula()
        else:
            raise ValueError("Model not fit")

    def _debug(self, *args):
        if self.log:
            logger.debug(*args)

    def _lltinf(self, traces, rho, depth, disp=False, override_impurity=None):
        """Recursive call for the decision tree construction.

        See lltinf for information on similar arguments.

        rho : list of numerics
            List of robustness values for each trace up until the current node
        depth : integer
                Maximum depth to be reached. Decrements for each recursive call
        """
        # Stopping condition
        if any(
            [stop(self, traces, rho, depth) for stop in self.stop_condition]):
            return None

        # Find primitive using impurity measure
        self._debug(
            f"Creating primitives at depth {depth} over {len(traces)} traces")
        primitives = self.primitive_factory(traces.signals, traces.labels)
        if override_impurity is None:
            impurity = self.optimize_impurity
        else:
            impurity = override_impurity
        self._debug(
            f"Finding best primitive at depth {depth} over {len(traces)} traces"
        )
        primitive, impurity = _find_best_primitive(
            traces,
            primitives,
            rho,
            impurity,
            disp,
            self.optimizer_args,
            times=self.times,
            interpolate=self.interpolate,
            tinter=self.tinter,
        )
        if disp:
            print("Best: {} ({})".format(primitive, impurity))
        self._debug(f"Best primitive found: {primitive} (imp: {impurity})")

        # Classify using best primitive and split into groups
        prim_rho = [
            primitive.score(model)
            for model in traces.models(self.interpolate, self.tinter)
        ]
        if rho is None:
            rho = [np.inf for i in traces.labels]
        tree = DTree(primitive, traces, rho)

        def split(prim_rho):
            sat, unsat = [], []
            for i, rho in enumerate(prim_rho):
                if rho >= 0:
                    sat.append(i)
                else:
                    unsat.append(i)

            return sat, unsat

        # [prim_rho, rho, signals, label]
        # sat_, unsat_ = split_groups(
        #     list(zip(prim_rho, rho, *traces.as_list())), lambda x: x[0] >= 0
        # )
        sat_, unsat_ = split(prim_rho)
        self._debug(f"Split: {len(sat_)}/{len(unsat_)}")

        # pure_wrong = all([t[3] <= 0 for t in sat_]) or all([t[3] >= 0 for t in unsat_])
        # pure_right = all([t[3] >= 0 for t in sat_]) or all([t[3] <= 0 for t in unsat_])
        sat_right = len([i for i in sat_ if traces.labels[i] >= 0])
        sat_wrong = len(sat_) - sat_right
        unsat_right = len([i for i in unsat_ if traces.labels[i] <= 0])
        unsat_wrong = len(unsat_) - unsat_right
        # Switch sat and unsat if labels are wrong. No need to negate prim rho since
        # we use it in absolute value later
        if sat_right * unsat_right == 0 or (sat_wrong * unsat_wrong != 0
                                            and sat_right < unsat_wrong):
            self._debug(f"Inverting primitive")

            sat_, unsat_ = unsat_, sat_
            tree.primitive.negate()

        # No further classification possible
        if len(sat_) == 0 or len(unsat_) == 0:
            self._debug("No further classification possible")
            if override_impurity is None:
                self._debug("Attempting to classify using impurity fallback")
                return self._lltinf(
                    traces,
                    rho,
                    depth,
                    disp=disp,
                    override_impurity=self.fallback_impurity,
                )
            else:
                return None

        # Redo data structures
        sat_traces, unsat_traces = [
            traces.subset(traces, idxs) for idxs in [sat_, unsat_]
        ]
        sat_rho, unsat_rho = [
            np.amin(
                [np.abs([prim_rho[i] for i in idxs]), [rho[i]
                                                       for i in idxs]], 0)
            for idxs in [sat_, unsat_]
        ]
        # sat, unsat = [
        #     (Traces(*group[2:]), np.amin([np.abs(group[0]), group[1]], 0))
        #     for group in [list(zip(*sat_)), list(zip(*unsat_))]
        # ]

        # Recursively build the tree
        tree.left = self._lltinf(sat_traces, sat_rho, depth - 1, disp=disp)
        tree.right = self._lltinf(unsat_traces,
                                  unsat_rho,
                                  depth - 1,
                                  disp=disp)

        return tree
Example #21
0
class TqaCore(object):
    def __init__(self,
                 ranker_opts,
                 reader_opts,
                 reuser_opts,
                 num_workers=None,
                 online=True):
        start = time.time()
        self.online = online
        if self.online:
            self.session = requests.Session()
            self.adapter = HTTPAdapter(pool_connections=5,
                                       pool_maxsize=5,
                                       max_retries=5)
            self.session.mount('http://', self.adapter)
            self.session.mount('https://', self.adapter)
            self.header = {
                'Content-Type': 'application/x-www-form-urlencode',
                'User-Agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
            }
            for key, value in enumerate(self.header):
                capability_key = 'phantomjs.page.customHeaders.{}'.format(key)
                webdriver.DesiredCapabilities.PHANTOMJS[capability_key] = value
            self.browser = webdriver.PhantomJS(
                executable_path='./phantomjs',
                service_log_path=os.path.devnull)

        logger.info('Initializing reuser...')
        bin_path = reuser_opts.get('embedded_corpus_bin_path')
        threshold = reuser_opts.get('threshold')
        self.matcher = FastTextMatcher(bin_path, threshold)

        logger.info('Initializing document rankers...')
        tfidf_model_paths = ranker_opts.get('tfidf_model_paths')
        self.tfidf_rank_k = ranker_opts.get('tfidf_rank_k',
                                            DEFAULTS['tfidf_rank_k'])
        self.rankers = {}
        for tfidf_model_path in tfidf_model_paths:
            db_table = os.path.basename(tfidf_model_path).split("_")[0]
            self.rankers[db_table] = TfidfRanker(tfidf_model_path)

        logger.info('Initializing document reader...')
        model_path = reader_opts.get('reader_model_path')
        self.reader = load_model(model_path, new_args=None)
        embedded_corpus_path = reader_opts.get('embedded_corpus_path', None)
        if embedded_corpus_path:
            logger.info('Expanding dictionary...')
            words = get_embedded_words(embedded_corpus_path)
            added_words, _ = self.reader.expand_dictionary(words, chars=None)
            self.reader.load_embeddings(added_words, embedded_corpus_path)
        use_cuda = reader_opts.get('use_cuda',
                                   None) and torch.cuda.is_available()
        if use_cuda:
            self.reader.cuda()
        self.top_k_answers = reader_opts.get('top_k_answers',
                                             DEFAULTS['top_k_answers'])

        logger.info('Initializing tokenizer and retriever...')
        annotators = set()
        if self.reader.args.use_pos:
            annotators.add('pos')
        if self.reader.args.use_lemma:
            annotators.add('lemma')
        if self.reader.args.use_ner:
            annotators.add('ner')
        tokenizer_opts = {
            'language': self.reader.args.language,
            'annotators': annotators,
            # 'timeout': 10000,
        }
        self.num_workers = num_workers
        self.pool = Pool(num_workers,
                         initializer=pool_init,
                         initargs=(tokenizer_opts, ))

        end = time.time()
        logger.info('Server start elapse: {min}min {sec}sec'.format(
            min=int(end - start) // 60, sec=int(end - start) % 60))

    def reuse(self, questions):
        ids = []
        titles = []
        descriptions = []
        for question in questions:
            ids.append(question['id'])
            titles.append(question['title'])
            descriptions.append(question['desc'])

        # q_tokens包含标题和描述
        q_tokens = self.pool.map_async(
            tokenize,
            [titles[i] + ' ' + descriptions[i] for i in range(0, len(titles))])
        q_tokens = q_tokens.get()
        score, index = self.matcher.match(q_tokens, titles, descriptions)
        return ids[index], score

    def answer(self, question_title, question_all):
        start_time = time.time()
        logger.info('Processing question: %s...' % question_title)
        logger.info('Retrieving top %d documents...' % self.tfidf_rank_k)

        results = None
        if self.online:
            try:
                results = self.online_rank(question_title=question_title,
                                           question_all=question_all)
            except:
                results = None

        if not results:
            with ThreadPool(self.num_workers) as threads:
                _rank = partial(self.rank,
                                question_title=question_title,
                                question_all=question_all)
                results = threads.map(_rank, self.rankers.keys())

        logger.info('Answer elapse = %d' % (time.time() - start_time))
        return results

    def answerOne(self, question_title, question_all, d_tokens, d_ids):
        logger.info("Tokenizing question...")
        q_tokens = self.pool.map_async(tokenize, [question_title])
        q_tokens = q_tokens.get()

        examples = []
        for i in range(len(d_tokens)):
            examples.append({
                'id': d_ids[i],
                'qtext': q_tokens[0].words(),
                'qlemma': q_tokens[0].lemma(),
                'dtext': d_tokens[i].words(),
                'dlemma': d_tokens[i].lemma(),
                'dpos': d_tokens[i].pos(),
                'dner': d_tokens[i].ner(),
            })

        logger.info("Batchify...")
        examples_in_batch = utils.batchify([
            utils.vectorize(example, self.reader, single_answer=False)
            for example in examples
        ])
        start, end, score = self.reader.predict(examples_in_batch,
                                                self.top_k_answers)

        # 从start, end生成答案
        results = []
        for i in range(len(start)):
            print(d_ids[i])
            for j in range(len(start[i])):
                answer = d_tokens[i].slice(start[i][j],
                                           end[i][j] + 1).untokenize()
                text = d_tokens[i].answer_sentence(start[i][j],
                                                   end[i][j] + 1).untokenize()
                results.append({
                    'score': score[i][j].item(),
                    'answer': answer,
                    'text': text,
                    'id': d_ids[i]
                })

        return results

    def online_rank(self, question_title, question_all):
        question_title = re.sub(
            '[\s+\.\!\/_,$%^*(+\"\')]+|[+——()?【】“”!,。?、~@#¥%……&*()]+', "",
            question_title)
        url = 'https://so.csdn.net/so/search/s.do?q=' + quote(
            question_title) + '&t=blog'
        self.browser.get(url)
        html = self.browser.page_source
        soup = BeautifulSoup(html, 'html.parser')
        link = soup.find_all('dl', {"class": "search-list J_search"})[0] \
            .find('dd', {'class': 'search-link'}) \
            .find('a')['href']
        logger.info('First Blog: %s' % link)

        resp = self.session.request('GET', link, params=None)
        soup = BeautifulSoup(resp.content, "html.parser")
        title = soup.find('h1', {'class': 'title-article'}).get_text()
        content = soup.find(id="article_content").get_text()
        content = re.sub(r"\t+|\n+|\r+", "", content)  # 去除非空格的空白符
        content = re.sub(r"\s{2,}", " ", content)
        # print(content)

        logger.info("Tokenizing document...")
        d_tokens = self.pool.map_async(tokenize, [content])
        d_tokens = d_tokens.get()

        return self.answerOne(question_title, question_all, d_tokens,
                              ['blog@' + link + '@' + title])

    def rank(self, db_table, question_title, question_all):
        logger.info("Finding closest documents...")
        result = [
            self.rankers[db_table].closest_docs(query=question_all,
                                                k=self.tfidf_rank_k)
        ]
        documents_ids, documents_scores = zip(*result)
        documents_ids = documents_ids[0]
        documents_scores = documents_scores[0]
        print(db_table, documents_ids, documents_scores)

        if len(documents_ids) == 0:
            return None

        logger.info("Tokenizing document...")
        _build_tokens = partial(build_tokens, db_table=db_table)
        d_rank_k_tokens = self.pool.map_async(_build_tokens, documents_ids)
        d_rank_k_tokens = d_rank_k_tokens.get()

        return self.answerOne(question_title, question_all, d_rank_k_tokens,
                              documents_ids)
        }, {
            'type': 'ineq',
            'fun': con_b3
        })
        res = minimize(turbine_opts,
                       x0,
                       method='SLSQP',
                       bounds=bnds,
                       constraints=cons)
        print(-res['fun'], num)

        return res

    p = Pool(processes=1)
    try:
        results = p.map_async(optimize, variables).get(9999999)
    except KeyboardInterrupt:
        p.terminate()
        sys.exit('KeyboardInterrupt')
    p.close()
    p.join()

    hours = int((time.time() - start_time) / 3600)
    minutes = int((time.time() - start_time - hours * 3600) / 60)
    seconds = time.time() - start_time - hours * 3600 - minutes * 60

    print('Turbine calculations done in', hours, 'h', minutes, 'm', seconds,
          's')

    results.sort(key=lambda x: -x['fun'])
Example #23
0
def compress_cso(fname_in, fname_out, level):
	fin, fout = open_input_output(fname_in, fname_out)
	fin.seek(0, os.SEEK_END)
	total_bytes = fin.tell()
	fin.seek(0)

	header_size, block_size, ver, align = 0x18, 0x800, 1, DEFAULT_ALIGN
	magic = ZISO_MAGIC if USE_LZ4 else CISO_MAGIC

	# We have to use alignment on any CSO files which > 2GB, for MSB bit of index as the plain indicator
	# If we don't then the index can be larger than 2GB, which its plain indicator was improperly set
	if total_bytes >= 2 ** 31 and align == 0:
		align = 1

	header = generate_cso_header(magic, header_size, total_bytes, block_size, ver, align)
	fout.write(header)

	total_block = total_bytes // block_size
	index_buf = [0 for i in range(total_block + 1)]

	fout.write(b"\x00\x00\x00\x00" * len(index_buf))
	show_comp_info(fname_in, fname_out, total_bytes, block_size, align, level)

	write_pos = fout.tell()
	percent_period = total_block // 100
	percent_cnt = 0

	if MP:
		pool = Pool()
	else:
		pool = None

	block = 0
	while block < total_block:
		if MP:
			percent_cnt += min(total_block - block, MP_NR)
		else:
			percent_cnt += 1

		if percent_cnt >= percent_period and percent_period != 0:
			percent_cnt = 0

			if block == 0:
				print("compress %3d%% avarage rate %3d%%\r" % (
					block // percent_period
					, 0), file=sys.stderr, end="")
			else:
				print("compress %3d%% avarage rate %3d%%\r" % (
					block // percent_period
					, 100 * write_pos // (block * 0x800)), file=sys.stderr, end="")

		if MP:
			iso_data = [(fin.read(block_size), level) for i in range(min(total_block - block, MP_NR))]
			cso_data_all = pool.map_async(zip_compress_mp, iso_data).get(9999999)

			for i in range(len(cso_data_all)):
				write_pos = set_align(fout, write_pos, align)
				index_buf[block] = write_pos >> align
				cso_data = cso_data_all[i]

				if 100 * len(cso_data) // len(iso_data[i][0]) >= min(COMPRESS_THREHOLD, 100):
					cso_data = iso_data[i][0]
					index_buf[block] |= 0x80000000  # Mark as plain
				elif index_buf[block] & 0x80000000:
					print("Align error, you have to increase align by 1 or CFW won't be able to read offset above 2 ** 31 bytes")
					sys.exit(1)

				fout.write(cso_data)
				write_pos += len(cso_data)
				block += 1
		else:
			iso_data = fin.read(block_size)

			try:
				cso_data = zip_compress(iso_data, level)
			except zlib.error as e:
				print("%d block: %s" % (block, e))
				sys.exit(-1)

			write_pos = set_align(fout, write_pos, align)
			index_buf[block] = write_pos >> align

			if 100 * len(cso_data) // len(iso_data) >= COMPRESS_THREHOLD:
				cso_data = iso_data
				index_buf[block] |= 0x80000000  # Mark as plain
			elif index_buf[block] & 0x80000000:
				print("Align error, you have to increase align by 1 or CFW won't be able to read offset above 2 ** 31 bytes")
				sys.exit(1)

			fout.write(cso_data)
			write_pos += len(cso_data)
			block += 1

	# Last position (total size)
	index_buf[block] = write_pos >> align

	# Update index block
	fout.seek(len(header))
	for i in index_buf:
		idx = pack('I', i)
#		assert(len(idx) == 4)
		fout.write(idx)

	print("ciso compress completed , total size = %8d bytes , rate %d%%" % (write_pos, (write_pos * 100 // total_bytes)))

	fin.close()
	fout.close()
Example #24
0
    for trial in xrange(trials):
        for budget in budgets:
            run = get_run(budget, algo)
            runs.append(run)

random.shuffle(runs)


def execute(run):
    # print run
    output = subprocess.check_output(run['arguments'])
    f = open(run['filename'], 'a')
    f.write(output)
    f.close()


pool = Pool(processes=mp.cpu_count())
result = pool.map_async(execute, runs, chunksize=1)
start_time = time.time()
while not result.ready():
    completed = len(runs) - result._number_left
    if completed > 0:
        eta = (time.time() - start_time) / completed * result._number_left / 60
        print '%6d left, ETA: %4.1f minutes' % (result._number_left, eta)
    sys.stdout.flush()
    time.sleep(1)

pool.close()

#nice parallel -j 6 --"command1""command2"
Example #25
0
def raster2pyramid(
    input_file,
    output_dir,
    options
    ):
    """
    Creates a tile pyramid out of an input raster dataset.
    """
    pyramid_type = options["pyramid_type"]
    scale_method = options["scale_method"]
    output_format = options["output_format"]
    resampling = options["resampling"]
    zoom = options["zoom"]
    bounds = options["bounds"]
    overwrite = options["overwrite"]

    # Prepare process parameters
    minzoom, maxzoom = _get_zoom(zoom, input_file, pyramid_type)
    process_file = os.path.join(
        os.path.dirname(os.path.realpath(__file__)),
        "tilify.py"
    )

    with rasterio.open(input_file, "r") as input_raster:
        output_bands = input_raster.count
        input_dtype = input_raster.dtypes[0]
        output_dtype = input_raster.dtypes[0]
        nodataval = input_raster.nodatavals[0]
        if not nodataval:
            nodataval = 0
        if output_format == "PNG":
            if output_bands > 3:
                output_bands = 3
                output_dtype = 'uint8'
        scales_minmax = ()
        if scale_method == "dtype_scale":
            for index in range(1, output_bands+1):
                scales_minmax += (DTYPE_RANGES[input_dtype], )
        elif scale_method == "minmax_scale":
            for index in range(1, output_bands+1):
                band = input_raster.read(index)
                scales_minmax += ((band.min(), band.max()), )
        elif scale_method == "crop":
            for index in range(1, output_bands+1):
                scales_minmax += ((0, 255), )
        if input_dtype == "uint8":
            scale_method = None
            scales_minmax = ()
            for index in range(1, output_bands+1):
                scales_minmax += ((None, None), )

    # Create configuration
    config = {}
    config.update(
        process_file=process_file,
        output={
            "path": output_dir,
            "format": output_format,
            "type": pyramid_type,
            "bands": output_bands,
            "dtype": output_dtype
            },
        scale_method=scale_method,
        scales_minmax=scales_minmax,
        input_files={"raster": input_file},
        config_dir=os.getcwd(),
        process_minzoom=minzoom,
        process_maxzoom=maxzoom,
        nodataval=nodataval,
        resampling=resampling,
        bounds=bounds,
        pixelbuffer=5,
        baselevel={"zoom": maxzoom, "resampling": resampling}
    )

    LOGGER.info("preparing process ...")

    try:
        mapchete = Mapchete(
            MapcheteConfig(
                config,
                zoom=zoom,
                bounds=bounds
            )
        )
    except PyCompileError as error:
        print error
        return
    except:
        raise

    # Prepare output directory and logging
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    logging.config.dictConfig(get_log_config(mapchete))

    for zoom in reversed(range(minzoom, maxzoom+1)):
        # Determine work tiles and run
        work_tiles = mapchete.get_work_tiles(zoom)
        func = partial(_worker,
            mapchete=mapchete,
            overwrite=overwrite
        )
        pool = Pool()
        try:
            pool.map_async(func, work_tiles)
            pool.close()
        except KeyboardInterrupt:
            LOGGER.info(
                "Caught KeyboardInterrupt, terminating workers"
                )
            pool.terminate()
            break
        except:
            raise
        finally:
            pool.close()
            pool.join()
Example #26
0
class ProcessPoolStrategy(ParallelStrategy, _PoolRunnableStrategy,
                          _Resultable):

    _Processors_Pool: Pool = None
    _Processors_List: List[Union[ApplyResult, AsyncResult]] = None

    def __init__(self, pool_size: int):
        super().__init__(pool_size=pool_size)

    def initialization(self,
                       queue_tasks: Optional[Union[_BaseQueueTask,
                                                   _BaseList]] = None,
                       features: Optional[Union[_BaseFeatureAdapterFactory,
                                                _BaseList]] = None,
                       *args,
                       **kwargs) -> None:
        super(ProcessPoolStrategy,
              self).initialization(queue_tasks=queue_tasks,
                                   features=features,
                                   *args,
                                   **kwargs)

        # Activate multiprocessing.managers.BaseManager server
        activate_manager_server()

        # Initialize and build the Processes Pool.
        __pool_initializer: Callable = kwargs.get("pool_initializer", None)
        __pool_initargs: IterableType = kwargs.get("pool_initargs", None)
        self._Processors_Pool = Pool(processes=self.pool_size,
                                     initializer=__pool_initializer,
                                     initargs=__pool_initargs)

    def apply(self,
              tasks_size: int,
              function: Callable,
              args: Tuple = (),
              kwargs: Dict = {}) -> None:
        self.reset_result()
        __process_running_result = None

        try:
            __process_running_result = [
                self._Processors_Pool.apply(func=function,
                                            args=args,
                                            kwds=kwargs)
                for _ in range(tasks_size)
            ]
            __exception = None
            __process_run_successful = True
        except Exception as e:
            __exception = e
            __process_run_successful = False

        # Save Running result state and Running result value as dict
        self._result_saving(successful=__process_run_successful,
                            result=__process_running_result,
                            exception=None)

    def async_apply(self,
                    tasks_size: int,
                    function: Callable,
                    args: Tuple = (),
                    kwargs: Dict = {},
                    callback: Callable = None,
                    error_callback: Callable = None) -> None:
        self.reset_result()
        self._Processors_List = [
            self._Processors_Pool.apply_async(func=function,
                                              args=args,
                                              kwds=kwargs,
                                              callback=callback,
                                              error_callback=error_callback)
            for _ in range(tasks_size)
        ]

        for process in self._Processors_List:
            _process_running_result = None
            _process_run_successful = None
            _exception = None

            try:
                _process_running_result = process.get()
                _process_run_successful = process.successful()
            except Exception as e:
                _exception = e
                _process_run_successful = False

            # Save Running result state and Running result value as dict
            self._result_saving(successful=_process_run_successful,
                                result=_process_running_result,
                                exception=_exception)

    def apply_with_iter(self,
                        functions_iter: List[Callable],
                        args_iter: List[Tuple] = None,
                        kwargs_iter: List[Dict] = None) -> None:
        self.reset_result()
        __process_running_result = None

        if args_iter is None:
            args_iter = [() for _ in functions_iter]

        if kwargs_iter is None:
            kwargs_iter = [{} for _ in functions_iter]

        try:
            __process_running_result = [
                self._Processors_Pool.apply(func=_func,
                                            args=_args,
                                            kwds=_kwargs) for _func, _args,
                _kwargs in zip(functions_iter, args_iter, kwargs_iter)
            ]
            __exception = None
            __process_run_successful = True
        except Exception as e:
            __exception = e
            __process_run_successful = False

        # Save Running result state and Running result value as dict
        self._result_saving(successful=__process_run_successful,
                            result=__process_running_result,
                            exception=None)

    def async_apply_with_iter(
            self,
            functions_iter: List[Callable],
            args_iter: List[Tuple] = None,
            kwargs_iter: List[Dict] = None,
            callback_iter: List[Callable] = None,
            error_callback_iter: List[Callable] = None) -> None:
        self.reset_result()

        if args_iter is None:
            args_iter = [() for _ in functions_iter]

        if kwargs_iter is None:
            kwargs_iter = [{} for _ in functions_iter]

        if callback_iter is None:
            callback_iter = [None for _ in functions_iter]

        if error_callback_iter is None:
            error_callback_iter = [None for _ in functions_iter]

        self._Processors_List = [
            self._Processors_Pool.apply_async(func=_func,
                                              args=_args,
                                              kwds=_kwargs,
                                              callback=_callback,
                                              error_callback=_error_callback)
            for _func, _args, _kwargs, _callback, _error_callback in zip(
                functions_iter, args_iter, kwargs_iter, callback_iter,
                error_callback_iter)
        ]

        for process in self._Processors_List:
            _process_running_result = None
            _process_run_successful = None
            _exception = None

            try:
                _process_running_result = process.get()
                _process_run_successful = process.successful()
            except Exception as e:
                _exception = e
                _process_run_successful = False

            # Save Running result state and Running result value as dict
            self._result_saving(successful=_process_run_successful,
                                result=_process_running_result,
                                exception=_exception)

    def map(self,
            function: Callable,
            args_iter: IterableType = (),
            chunksize: int = None) -> None:
        self.reset_result()
        _process_running_result = None

        try:
            _process_running_result = self._Processors_Pool.map(
                func=function, iterable=args_iter, chunksize=chunksize)
            _exception = None
            _process_run_successful = True
        except Exception as e:
            _exception = e
            _process_run_successful = False

        # Save Running result state and Running result value as dict
        for __result in (_process_running_result or []):
            self._result_saving(successful=_process_run_successful,
                                result=__result,
                                exception=_exception)

    def async_map(self,
                  function: Callable,
                  args_iter: IterableType = (),
                  chunksize: int = None,
                  callback: Callable = None,
                  error_callback: Callable = None) -> None:
        self.reset_result()

        _process_running_result = None
        _exception = None

        _map_result = self._Processors_Pool.map_async(
            func=function,
            iterable=args_iter,
            chunksize=chunksize,
            callback=callback,
            error_callback=error_callback)

        try:
            _process_running_result = _map_result.get()
            _process_run_successful = _map_result.successful()
        except Exception as e:
            _exception = e
            _process_run_successful = False

        # Save Running result state and Running result value as dict
        for __result in (_process_running_result or []):
            self._result_saving(successful=_process_run_successful,
                                result=__result,
                                exception=_exception)

    def map_by_args(self,
                    function: Callable,
                    args_iter: IterableType[IterableType] = (),
                    chunksize: int = None) -> None:
        self.reset_result()
        _process_running_result = None

        try:
            _process_running_result = self._Processors_Pool.starmap(
                func=function, iterable=args_iter, chunksize=chunksize)
            _exception = None
            _process_run_successful = True
        except Exception as e:
            _exception = e
            _process_run_successful = False

        # Save Running result state and Running result value as dict
        for __result in (_process_running_result or []):
            self._result_saving(successful=_process_run_successful,
                                result=__result,
                                exception=_exception)

    def async_map_by_args(self,
                          function: Callable,
                          args_iter: IterableType[IterableType] = (),
                          chunksize: int = None,
                          callback: Callable = None,
                          error_callback: Callable = None) -> None:
        self.reset_result()
        _map_result = self._Processors_Pool.starmap_async(
            func=function,
            iterable=args_iter,
            chunksize=chunksize,
            callback=callback,
            error_callback=error_callback)
        _process_running_result = _map_result.get()
        _process_run_successful = _map_result.successful()

        # Save Running result state and Running result value as dict
        for __result in (_process_running_result or []):
            self._result_saving(successful=_process_run_successful,
                                result=__result,
                                exception=None)

    def imap(self,
             function: Callable,
             args_iter: IterableType = (),
             chunksize: int = 1) -> None:
        self.reset_result()
        _process_running_result = None

        try:
            imap_running_result = self._Processors_Pool.imap(
                func=function, iterable=args_iter, chunksize=chunksize)
            _process_running_result = [
                result for result in imap_running_result
            ]
            _exception = None
            _process_run_successful = True
        except Exception as e:
            _exception = e
            _process_run_successful = False

        # Save Running result state and Running result value as dict
        for __result in (_process_running_result or []):
            self._result_saving(successful=_process_run_successful,
                                result=__result,
                                exception=_exception)

    def imap_unordered(self,
                       function: Callable,
                       args_iter: IterableType = (),
                       chunksize: int = 1) -> None:
        self.reset_result()
        _process_running_result = None

        try:
            imap_running_result = self._Processors_Pool.imap_unordered(
                func=function, iterable=args_iter, chunksize=chunksize)
            _process_running_result = [
                result for result in imap_running_result
            ]
            _exception = None
            _process_run_successful = True
        except Exception as e:
            _exception = e
            _process_run_successful = False

        # Save Running result state and Running result value as dict
        for __result in (_process_running_result or []):
            self._result_saving(successful=_process_run_successful,
                                result=__result,
                                exception=_exception)

    def _result_saving(self, successful: bool, result: List,
                       exception: Exception) -> None:
        _process_result = {
            "successful": successful,
            "result": result,
            "exception": exception
        }
        self._Processors_Running_Result.append(_process_result)

    def close(self) -> None:
        self._Processors_Pool.close()
        self._Processors_Pool.join()

    def terminal(self) -> None:
        self._Processors_Pool.terminate()

    def get_result(self) -> List[_ProcessPoolResult]:
        return self.result()

    def _saving_process(self) -> List[_ProcessPoolResult]:
        _pool_results = []
        for __result in self._Processors_Running_Result:
            _pool_result = _ProcessPoolResult()
            _pool_result.is_successful = __result["successful"]
            _pool_result.data = __result["result"]
            _pool_results.append(_pool_result)
        return _pool_results
 def map_async(self, func, args=(), kwargs={}, callback=None):
     results = NativePool.map_async(self,
                                    MultiprocessingLogExceptions(func),
                                    args, kwargs, callback)
     self.results.extend(results)
     return results
Example #28
0
def compress_cso(fname_in, fname_out, level):
    fin, fout = open_input_output(fname_in, fname_out)
    fin.seek(0, os.SEEK_END)
    total_bytes = fin.tell()
    fin.seek(0)

    header_size, block_size, ver, align = 0x18, 0x800, 1, DEFAULT_ALIGN
    magic = ZISO_MAGIC if USE_LZ4 else CISO_MAGIC

    # We have to use alignment on any CSO files which > 2GB, for MSB bit of index as the plain indicator
    # If we don't then the index can be larger than 2GB, which its plain indicator was improperly set
    if total_bytes >= 2 ** 31 and align == 0:
        align = 1

    header = generate_cso_header(magic, header_size, total_bytes, block_size, ver, align)
    fout.write(header)

    total_block = total_bytes // block_size
    index_buf = [0 for i in range(total_block + 1)]

    fout.write(b"\x00\x00\x00\x00" * len(index_buf))
    show_comp_info(fname_in, fname_out, total_bytes, block_size, align, level)

    write_pos = fout.tell()
    percent_period = total_block // 100
    percent_cnt = 0

    if MP:
        pool = Pool()
    else:
        pool = None

    block = 0
    while block < total_block:
        if MP:
            percent_cnt += min(total_block - block, MP_NR)
        else:
            percent_cnt += 1

        if percent_cnt >= percent_period and percent_period != 0:
            percent_cnt = 0

            if block == 0:
                print("compress %3d%% avarage rate %3d%%\r" % (block // percent_period, 0), file=sys.stderr, end="")
            else:
                print(
                    "compress %3d%% avarage rate %3d%%\r"
                    % (block // percent_period, 100 * write_pos // (block * 0x800)),
                    file=sys.stderr,
                    end="",
                )

        if MP:
            iso_data = [(fin.read(block_size), level) for i in range(min(total_block - block, MP_NR))]
            cso_data_all = pool.map_async(zip_compress_mp, iso_data).get(9999999)

            for i in range(len(cso_data_all)):
                write_pos = set_align(fout, write_pos, align)
                index_buf[block] = write_pos >> align
                cso_data = cso_data_all[i]

                if 100 * len(cso_data) // len(iso_data[i][0]) >= min(COMPRESS_THREHOLD, 100):
                    cso_data = iso_data[i][0]
                    index_buf[block] |= 0x80000000  # Mark as plain
                elif index_buf[block] & 0x80000000:
                    print(
                        "Align error, you have to increase align by 1 or CFW won't be able to read offset above 2 ** 31 bytes"
                    )
                    sys.exit(1)

                fout.write(cso_data)
                write_pos += len(cso_data)
                block += 1
        else:
            iso_data = fin.read(block_size)

            try:
                cso_data = zip_compress(iso_data, level)
            except zlib.error as e:
                print("%d block: %s" % (block, e))
                sys.exit(-1)

            write_pos = set_align(fout, write_pos, align)
            index_buf[block] = write_pos >> align

            if 100 * len(cso_data) // len(iso_data) >= COMPRESS_THREHOLD:
                cso_data = iso_data
                index_buf[block] |= 0x80000000  # Mark as plain
            elif index_buf[block] & 0x80000000:
                print(
                    "Align error, you have to increase align by 1 or CFW won't be able to read offset above 2 ** 31 bytes"
                )
                sys.exit(1)

            fout.write(cso_data)
            write_pos += len(cso_data)
            block += 1

            # Last position (total size)
    index_buf[block] = write_pos >> align

    # Update index block
    fout.seek(len(header))
    for i in index_buf:
        idx = pack("I", i)
        # 		assert(len(idx) == 4)
        fout.write(idx)

    print(
        "ciso compress completed , total size = %8d bytes , rate %d%%" % (write_pos, (write_pos * 100 // total_bytes))
    )

    fin.close()
    fout.close()
Example #29
0
    # 财经 https://news.sina.com.cn/roll/#pageid=153&lid=2516&k=&num=50&page=1
    # 科技 https://news.sina.com.cn/roll/#pageid=153&lid=2515&k=&num=50&page=1
    # 军事 https://news.sina.com.cn/roll/#pageid=153&lid=2514&k=&num=50&page=1
    # 娱乐 https://news.sina.com.cn/roll/#pageid=153&lid=2513&k=&num=50&page=1
    # 彩票  http://sports.sina.com.cn/roll/#pageid=13&lid=581&k=&num=50&page=1
    # ------------------以下参数自己修改----------------
    start_page = 1
    end_page = 2
    # home_path = 'test_data/'
    home_path = 'train_data/'
    # ------------------以上参数自己修改----------------
    params = [  # pageid, lid, start_page(包含), end_page(不包含), save_path
        ('153', '2513', start_page, end_page, home_path + '娱乐.csv'),
        ('153', '2514', start_page, end_page, home_path + '军事.csv'),
        ('153', '2515', start_page, end_page, home_path + '科技.csv'),
        ('153', '2516', start_page, end_page, home_path + '财经.csv'),
        ('153', '2517', start_page, end_page, home_path + '股市.csv'),
        ('13', '585', start_page, end_page, home_path + '赛车.csv'),
        ('13', '571', start_page, end_page, home_path + '篮球.csv'),
        ('13', '572', start_page, end_page, home_path + '足球.csv'),
        ('13', '583', start_page, end_page, home_path + '跑步.csv'),
        ('13', '581', start_page, end_page, home_path + '彩票.csv'),
    ]
    print('program start...')
    start_time = time.time()
    pool = Pool(processes=multiprocessing.cpu_count() - 1)  # 开启多进程,(进程并不会同时运行)
    pool.map_async(start_spider, params)
    pool.close()
    pool.join()
    print('program run time:', time.time() - start_time, 'seconds')
import random
from multiprocessing.pool import Pool
import multiproc_defs as defs

if __name__ == '__main__':
    pool = Pool()
    to_factor = [random.randint(100000, 50000000) for i in range(20)]
    results = pool.map_async(defs.prime_factors, to_factor)
    while not results.ready():
        results.wait(timeout=0)
    for value, factors in zip(to_factor, results.get()):
        print("The factors of {} are {}".format(value, factors))
Example #31
0
 def map_async(self, func, iterable, chunksize=None, callback=None):
     return Pool.map_async(self, LogExceptions(func), iterable, chunksize,
                           callback)
Example #32
0
    start_page = 0
    end_page = 1
    opts, args = getopt.getopt(sys.argv[1:], "hs:e:")

    for cmd, arg in opts:
        if cmd in ("-s"):
            start_page = int(arg)
        if cmd in ("-e"):
            end_page = int(arg) + 1

    print("start_page", start_page)
    print("end_page", end_page)

    write_page_file(start_page, end_page)

    pic_list = []
    for i in range(start_page, end_page):
        pic_list += get_pic_url_list(i)

    print('获取完毕,开始下载图片...')

    start_time = time.time()
    pool = Pool(10)
    pool.map_async(download_pic, pic_list)
    pool.close()
    pool.join()

    print(error_page)
    print(f'Down done\n 耗时:{time.time() - start_time}秒')
Example #33
0

def work(a):
    pass


if __name__ == '__main__':
    pool = Pool(3)
    for i in range(10):
        result = pool.apply(work, (i, ))
        print(result)
    print("apply all done")

    ###########################################
    results = []
    for i in range(10):
        result = pool.apply_async(work, (i, ))
        results.append(result)
    for result in results:
        print(result.get())
    print("apply all done")
    ###########################################
    results = pool.map(work, (i, ))
    print(results)
    ###########################################
    results = pool.map_async(work, (i, ))
    print(results.get())

    pool.close()
    pool.join()
Example #34
0
 def map_async(self, func, args=(), kwargs={}, callback=None):
   results = NativePool.map_async(
     self, MultiprocessingLogExceptions(func),
     args, kwargs, callback)
   self.results.extend(results)
   return results
Example #35
0
        return self.__getitem__(item)

    def __len__(self):
        return len(self.img)


if __name__ == "__main__":
    from tqdm import trange
    from multiprocessing.pool import Pool
    data = SISTLine("/home/ziheng/indoorDist_new", None, "train")
    # os.makedirs("/home/ziheng/heatmaps")
    pool = Pool(20)
    cnt = 0

    def readnsave(i):
        batch = data[i]
        hm = batch["heatmap"].numpy()
        np.save(f"/home/ziheng/heatmaps/{i}.npy", hm)

    def juncsave(i):
        batch = data[i]
        hm = batch["heatmap"].numpy()
        junc = batch["heatmap"].numpy()
        np.save(f"/home/ziheng/heatmaps/{i}.npy", hm)

    readnsave.cnt = 0
    # for i in trange(len(data)):
    pool.map_async(readnsave, range(len(data)))
    pool.close()
    pool.join()
Example #36
0
                           bucket,
                           latest_dir + filename,
                           ExtraArgs={
                               'ContentType': CT,
                               'ACL': "public-read",
                               'CacheControl': 'no-cache'
                           })
        except Exception as error:
            core_fail('Upload Error ' + str(error))


##################
# Test Run Logic #
##################
check_env()
create_dir()
# Run through all the tags
pool = Pool(processes=3)
r = pool.map_async(container_test, tags, callback=update_globals)
r.wait()
report_render()
badge_render()
report_upload()
# Exit based on test results
if report_status == 'PASS':
    print('Tests Passed exiting 0')
    sys.exit(0)
elif report_status == 'FAIL':
    print('Tests Failed exiting 1')
    sys.exit(1)
Example #37
0
def run_commands(commands,
                 processes=None,
                 timeout=None,
                 meta=None,
                 observer=None):
    pool = Pool(processes=processes)
    manager, queue, m = None, None, None
    manager = Manager()
    m = manager.Queue()
    if observer:
        queue = manager.Queue()

    if meta:
        commands = [(i, meta, command, timeout, queue, m)
                    for i, (command, meta) in enumerate(zip(commands, meta))]
    else:
        commands = [(i, meta, command, timeout, queue, m)
                    for i, command in enumerate(commands)]

    with temp_file() as f:
        filename = str(f)

    m_process = Process(target=monitor, args=(filename, m))
    m_process.daemon = True
    m_process.start()

    def clean_exit():
        status("Keyboard interrupt intercepted, shutting down")
        try:
            m_process.terminate()
            m_process.join()
        except Exception:
            status("Monitor process could not be shut down")
            print_exc()

        try:
            pool.terminate()
            pool.join()
        except Exception:
            status("Pool could not be shut down")
            print_exc()

        status("Shutting down potential orphan processes")
        active = set()
        with open(filename) as ref:
            for line in ref:
                parts = line.split(" ")
                if parts[0] == "ADD":
                    active.add(int(parts[1]))
                elif parts[0] == "REM":
                    active.remove(int(parts[1]))

        for pid in active:
            try:
                print("Killing", pid)
                os.killpg(pid,
                          signal.SIGTERM)  # send signal to the process group
            except OSError as e:
                if e.errno != errno.ESRCH:
                    if e.errno == errno.EPERM:
                        os.waitpid(-pid, 0)
                else:
                    raise e
            except Exception:
                print_exc()
                pass

        os.unlink(filename)

        status("Completely shut down")

    r = pool.map_async(worker, commands)
    atexit.register(clean_exit)

    if observer:
        observe(observer, queue, len(commands))

    r.wait()
    status("### DONE ##")
    m.put(Update.SENTINEL)
    m_process.join()