def run(self):
     pool = ThreadPool(processes=self._worker_number)
     if self._batch > 1:
         pool.imap_unordered(func=self._func_wrap_batch, iterable=iter(self._forerver_get_batch, None))
     else:
         pool.imap_unordered(func=self._func_wrap, iterable=iter(self._forever_get, None))
     while not self._stop_event.is_set():
         time.sleep(0.01)
class SiteThreadChecker(SiteChecker):
    pool_size_key = "thread_pool_size"

    def __init__(self, *args, thread_pool_size=1, **kwargs):
        #super(SiteThreadChecker, self).__init__(*args, **kwargs)
        SiteChecker.__init__(self, *args, output_buff_size=thread_pool_size*50, **kwargs)
        self.max_thread = thread_pool_size
        LinkChecker.max_http_connection = self.max_thread
        LinkChecker.max_pool_size = self.max_thread
        #self.pool = multiprocessing.Pool(processes=self.max_thread, maxtasksperchild=1)
        self.pool = ThreadPool(processes=self.max_thread)
        self._set_task_control_max(self.max_thread)
        #self.temp_queue = Queue(self.max_thread * 2)
        #self.temp_queue.put(self.page_list[0])
        #self.pump = OnSiteLinkPump(self.temp_queue, self.page_list)
        #print("init siteThreadChecker finished")

    @staticmethod
    def get_input_parameter(full_link: str, max_page:int, max_level: int, output_queue, pool_size: int):
        temp = SiteChecker.get_input_parameter_base(full_link, max_page, max_level, output_queue)
        temp.update({SiteThreadChecker.pool_size_key: pool_size})
        return temp

    def additional_reset(self):
        if self.pool is not None:
            self.pool.terminate()
            self.pool = ThreadPool(processes=self.max_thread)
            #self.pool = multiprocessing.Pool(processes=self.max_thread, maxtasksperchild=1)

    def addtional_clear(self):
        if self.pool is not None:
            self.pool.terminate()

    def stop(self):
        try:
            self.data_source.set_continue_lock(False)
            self.pool.terminate()
        except:
            pass
        super(SiteThreadChecker, self).stop()

    def begin_crawl(self, level=0):
        #while self.can_continue() and self.data_source.can_continue():
        #print("continue to work, page limit:", self.max_page, " max_level: ", self.max_level)
        #target_func = functools.partial(PageChecker.crawl_page, self)
        try:
            self.pool.imap_unordered(PageChecker.crawl_page_for_iter, self.data_source)
            while self.data_source.can_continue():
                time.sleep(0.1)
            #results = [self.pool.apply_async(PageChecker.crawl_page, args=(self, page))
            #           for page in self.data_source.get_next(OnSiteLink.TypeOnSite, ResponseCode.LinkOK)]
            #[p.get() for p in results]
        except Exception as ex:
            #self.stop()
            msg = "begin_crawl() " + str(self.get_site_info())
            ErrorLogger.log_error("SiteThreadChecker", ex, msg)
Exemple #3
0
def main():
    assert sys.version_info[0] == 3

    moduleset_versions = get_moduleset_versions()

    pool = ThreadPool(20)
    pool_iter = pool.imap_unordered(_fetch_version, moduleset_versions.keys())

    arch_versions = {}
    for i, some_versions in enumerate(pool_iter):
        arch_versions.update(some_versions)

    for name, version in sorted(moduleset_versions.items()):
        arch_name = fix_name(name)
        if arch_name in arch_versions:
            arch_version, arch_url = arch_versions[arch_name]
            arch_version = arch_version.split("+", 1)[0]
            if arch_name == "readline":
                arch_version = ".".join(arch_version.split(".")[:2])
        else:
            arch_version = "???"
            arch_url = ""

        if is_maybe_newer(arch_version, version):
            print("%-30s %-20s %-20s %s" % (
                name, version, arch_version, arch_url))
Exemple #4
0
def main(argv):
    """Go Main Go"""
    scenario = int(argv[1])
    sdf = load_scenarios()
    queue = realtime_run(sdf.loc[scenario], scenario)
    pool = ThreadPool()  # defaults to cpu-count
    sz = len(queue)
    failures = 0

    def _run(row):
        """ Run ! """
        wr = WeppRun(row[0], row[1], row[2], scenario)
        return wr.run()

    sts00 = datetime.datetime.now()
    sts0 = datetime.datetime.now()
    for i, res in enumerate(pool.imap_unordered(_run, queue), 1):
        if not res:
            failures += 1
        if failures > 100:
            print("ABORT due to more than 100 failures...")
            sys.exit(10)
        if i > 0 and i % 5000 == 0:
            delta00 = datetime.datetime.now() - sts00
            delta0 = datetime.datetime.now() - sts0
            speed00 = i / delta00.total_seconds()
            speed0 = 5000 / delta0.total_seconds()
            remaining = ((sz - i) / speed00) / 3600.
            sts0 = datetime.datetime.now()
            print((
                '%5.2fh Processed %6s/%6s [inst/tot %.2f/%.2f rps] '
                'remaining: %5.2fh'
                ) % (delta00.total_seconds() / 3600., i, sz,
                     speed0, speed00, remaining)
            )
def test_GIL():
    """tests running of multiple queries in a threadpool"""
    vertices, triangles = triangle_soup(10000, (-5, 5))
    mesh0 = pyopcode.Model(vertices, triangles)
    vertices, triangles = triangle_soup(10000, (-5, 5))
    mesh1 = pyopcode.Model(vertices, triangles)

    col = pyopcode.Collision(mesh0, mesh1)

    identity = np.identity(4).astype(np.float32)

    def transform_generator():
        """generate affine rotation matrices"""
        np.random.seed(42)
        for i in range(100):
            r = np.random.normal(size=(3, 3))
            u, _, v = np.linalg.svd(r)
            r = u.dot(np.eye(*r.shape)).dot(v)
            a = identity.copy()
            a[:3, :3] = r
            yield a

    from multiprocessing.pool import ThreadPool
    pool = ThreadPool(processes=4)
    results = pool.imap_unordered(lambda affine: col.query(affine, identity), transform_generator())

    import time
    start = time.clock()
    for r in results:
        print(len(r))
    print(time.clock() - start)
Exemple #6
0
def get_for_genres(genres):
    genres = set(genres)
    playlists = {}
    new_genres = set()

    for page in xrange(5):
        args = []
        for g in genres:
            args.append((g, page))

        try:
            pool = ThreadPool(PROCESSES)
            pfunc = parse_page
            for i, res in enumerate(pool.imap_unordered(pfunc, args)):
                genre, page, pl, found = res
                print "%d/%d" % (i + 1, len(args))
                playlists.update(pl)
                new_genres |= found
                if not pl:
                    genres.remove(genre)
        except Exception as e:
            print e
            return playlists, []
        finally:
            pool.terminate()
            pool.join()

    return playlists, new_genres
def UrlMode(corpus, request_parallelism):
    """Finds Wayback Machine URLs and writes them to disk.

  Args:
    corpus: A corpus.
    request_parallelism: The number of concurrent requests.
  """

    for dataset in datasets:
        print "Finding Wayback Machine URLs for the %s set:" % dataset
        old_urls_filename = "%s/%s_urls.txt" % (corpus, dataset)
        new_urls_filename = "%s/wayback_%s_urls.txt" % (corpus, dataset)

        urls = ReadMultipleUrls(old_urls_filename)

        p = ThreadPool(request_parallelism)
        results = p.imap_unordered(WaybackUrl, urls)

        progress_bar = ProgressBar(len(urls))
        new_urls = []
        for result in results:
            if result:
                new_urls.append(result)

            progress_bar.Increment()

        WriteUrls(new_urls_filename, new_urls)
def collect_stats(args):
  bots_json = swarming_query('bots', '--limit', '10000')
  if 'error' in bots_json:
    return bots_json
  bots = bots_json.get('items', [])
  stats = {
      'bots_count': len(bots),
      'bots': {},
  }
  logging.info('Found %d bots; collecting tasks.', stats['bots_count'])
  pool = ThreadPool(100)
  count = 0

  def func(bot):
    return process_bot(args, bot)

  try:
    for bot_id, bot_results in pool.imap_unordered(func, bots):
      count += 1
      stats['bots'][bot_id] = bot_results
      logging.info('%4d of %4d (%2.0f%%) of bots processed',
                   count, stats['bots_count'],
                   count * 100 / stats['bots_count'])
  except KeyboardInterrupt as e:
    pass
  return stats
Exemple #9
0
        class parallel_map(collections.Iterable):

            def __init__(self, pool_size, function, *iterables):
                if not isinstance(pool_size, numbers.Integral):
                    raise TypeError('pool_size must be an integer, not ' +
                                    repr(pool_size))
                elif not callable(function):
                    raise TypeError('function must be callable, not ' +
                                    repr(function))
                elif not iterables:
                    raise TypeError('missing iterable')
                self.pool = ThreadPool(pool_size)
                self.function = function
                self.results = self.pool.imap_unordered(self.map_function,
                                                        zip(*iterables))

            def map_function(self, args):
                try:
                    value = self.function(*args)
                except Exception:
                    return False, sys.exc_info()
                return True, value

            def __iter__(self):
                errors = []
                for success, value in self.results:
                    if success:
                        yield value
                    else:
                        errors.append(value)
                self.pool.close()
                self.pool.join()
                for error in errors:
                    exec('raise error[1], None, error[2]')
Exemple #10
0
    def _fit(self, dataset):
        est = self.getOrDefault(self.estimator)
        epm = self.getOrDefault(self.estimatorParamMaps)
        numModels = len(epm)
        eva = self.getOrDefault(self.evaluator)
        tRatio = self.getOrDefault(self.trainRatio)
        seed = self.getOrDefault(self.seed)
        randCol = self.uid + "_rand"
        df = dataset.select("*", rand(seed).alias(randCol))
        condition = (df[randCol] >= tRatio)
        validation = df.filter(condition).cache()
        train = df.filter(~condition).cache()

        subModels = None
        collectSubModelsParam = self.getCollectSubModels()
        if collectSubModelsParam:
            subModels = [None for i in range(numModels)]

        tasks = _parallelFitTasks(est, train, eva, validation, epm, collectSubModelsParam)
        pool = ThreadPool(processes=min(self.getParallelism(), numModels))
        metrics = [None] * numModels
        for j, metric, subModel in pool.imap_unordered(lambda f: f(), tasks):
            metrics[j] = metric
            if collectSubModelsParam:
                subModels[j] = subModel

        train.unpersist()
        validation.unpersist()

        if eva.isLargerBetter():
            bestIndex = np.argmax(metrics)
        else:
            bestIndex = np.argmin(metrics)
        bestModel = est.fit(dataset, epm[bestIndex])
        return self._copyValues(TrainValidationSplitModel(bestModel, metrics, subModels))
def get_first_result_from_threads(calls):
    calls = list(enumerate(calls))

    def run_func(call):
        i, call = call
        func = call[0]
        args = call[1] if len(call)>1 else []
        kwargs = call[2] if len(call)>2 else {}
        try:
            return i, func(*args, **kwargs)
        except Exception as e:
            return i, e

    pool = ThreadPool(processes=len(calls))
    result = pool.imap_unordered(run_func, calls).next()

    for thread in pool._pool:
        # via http://stackoverflow.com/a/15274929
        if not thread.isAlive():
            continue
        exc = ctypes.py_object(SystemExit)
        res = ctypes.pythonapi.PyThreadState_SetAsyncExc(
            ctypes.c_long(thread.ident), exc)
        if res == 0:
            raise ValueError("nonexistent thread id")
        elif res > 1:
            # """if it returns a number greater than one, you're in trouble,
            # and you should call it again with exc=NULL to revert the effect"""
            ctypes.pythonapi.PyThreadState_SetAsyncExc(thread.ident, None)
            raise SystemError("PyThreadState_SetAsyncExc failed")

    return result
Exemple #12
0
def convert2(names, name_func, nprocs=4):
    pool = ThreadPool(processes=nprocs)
    def converter(in_name):
        out_name = name_func(in_name)
        check_call(['convert', in_name, out_name])
        return out_name

    return pool.imap_unordered(converter, names)
Exemple #13
0
def process_threaded(img, filters, threadn = 8):
    accum = np.zeros_like(img)
    def f(kern):
        return cv2.filter2D(img, cv2.CV_8UC3, kern)
    pool = ThreadPool(processes=threadn)
    for fimg in pool.imap_unordered(f, filters):
        np.maximum(accum, fimg, accum)
    return accum
 def run_jobs(self, f, jobs):
     if self.usecloud:
         jids = cloud.map(f, jobs, _env=self.cloud_env, _profile=True, _depends_on=self.preprocess_job)
         ires = cloud.iresult(jids)
     else:
         pool = ThreadPool(processes=cv2.getNumberOfCPUs())
         ires = pool.imap_unordered(f, jobs)
     return ires
def process_threaded(img, filters, threadn = 8):
    accum = np.zeros_like(img)
    def f(kern):
        return cv2.filter2D(img, cv2.CV_8UC3, kern)
    pool = ThreadPool(processes=threadn)
    for fimg in pool.imap_unordered(f, filters):
        np.maximum(accum, fimg, accum)
    return accum
def main(api_url, api_access_token):
    client = dmapiclient.DataAPIClient(api_url, api_access_token)
    pool = ThreadPool(10)
    count = 1
    for i in pool.imap_unordered(update(client), enumerate(client.find_services_iter())):
        count += i
        if count % 1000 == 0:
            print("** {}".format(count))
def check_migration(client, stage, framework_slug, draft_bucket, documents_bucket):
    do_check_draft_and_service = functools.partial(check_draft_and_service, client)
    pool = ThreadPool(10)
    drafts = pool.imap_unordered(
        do_check_draft_and_service,
        find_drafts(client, framework_slug))
    for draft in drafts:
        pass
Exemple #18
0
def f(n, num_processes=None):
    if num_processes is None:
        num_processes = mp.cpu_count() - 1
    base_index = len(kwrds)
    tp = ThreadPool(num_processes)

    im = tp.imap_unordered(f_mp, range(base_index, n + 1))
    return sum([v for v in im if not v is None])
Exemple #19
0
def _run_matches(matches, name, num_processes=NUM_PROCS, debug=False):
    results = []
    pool = Pool(1) if debug else Pool(num_processes)
    for result in pool.imap_unordered(play, matches):
        print("+" if result[0].name == name else '-', end="")
        results.append(result)
    print()
    return results
 def download(self):
     tp = ThreadPool(100)
     result = tp.imap_unordered(self.__worker, self.items)
     for item in result:
         ok, book_output_dir = item
         if not ok:
             logger = Log()
             logger.write_error("Intro: " + book_output_dir + '\n')
     tp.terminate()
    def _threaded_read(self):
        elements = [idx for idx in range(1, len(self.annotation_db))]
        pool = ThreadPool(processes=4)

        with tqdm.tqdm(total=len(elements), disable=not is_master()) as pbar:
            for i, _ in enumerate(pool.imap_unordered(self._fill_cache, elements)):
                if i % 100 == 0:
                    pbar.update(100)
        pool.close()
def maybe_convert_to_wav(base_dir):
    roots = list(os.walk(base_dir))
    print("Converting and joining source audio files...")
    bar = progressbar.ProgressBar(max_value=len(roots), widgets=SIMPLE_BAR)
    tp = ThreadPool()
    for _ in bar(tp.imap_unordered(maybe_convert_one_to_wav, roots)):
        pass
    tp.close()
    tp.join()
Exemple #23
0
def main(api_url, api_access_token):
    client = dmapiclient.DataAPIClient(api_url, api_access_token)
    pool = ThreadPool(10)
    count = 1
    for i in pool.imap_unordered(update(client),
                                 enumerate(client.find_services_iter())):
        count += i
        if count % 1000 == 0:
            print("** {}".format(count))
def _run_matches(matches, name, num_processes=NUM_PROCS, debug=False):
    results = []
    pool = Pool(1) if debug else Pool(num_processes)
    print("Running {} games using {}:".format(len(matches), heuristic_type))
    for result in pool.imap_unordered(play, matches):
        print("+" if result[0].name == name else '-', end="")
        results.append(result)
    print()
    return results
Exemple #25
0
def _run_matches(matches, name, num_processes=NUM_PROCS):
    results = []
    pool = Pool(num_processes)
    print("Running {} games:".format(len(matches)))
    for result in pool.imap_unordered(play, matches):
        print("+" if result[0].name == name else '-', end="")
        results.append(result)
    print()
    return results
Exemple #26
0
    def initialize(self):
        """Initialize the ARP spoofer
        """
        self.victim = (self.config['to_ip'].value, getmacbyip(self.config['to_ip'].value))

        if self.config['from_ip'].value is None:
            # Enumerate all IPs in network
            Msg("Gathering information on network...this may take a minute")
            thread_pool = ThreadPool(processes=25)
            ip_whitelist = {self.victim[0], self.local[0]}
            for ip, mac in thread_pool.imap_unordered(arp.get_mac_address_for_ip,
                                                      (ip for ip in self.enumerate_all_ips_in_network(
                                                              self.config['to_ip'].value, self.get_iface_netmask()))):
                if ip in ip_whitelist:
                    continue

                if mac is None or mac == "ff:ff:ff:ff:ff:ff":
                    # no mac for you, next!
                    continue

                self.targets[ip] = mac
                # todo Consider adding an upper limit on hosts being poisoned
        elif "/" in self.config['from_ip'].value:
            source_ip, netmask = self.cidr_to_ip_and_netmask(self.config['from_ip'].value)
            # Enumerate all IPs in network
            Msg("Gathering information on network...this may take a minute")
            thread_pool = ThreadPool(processes=25)
            ip_whitelist = {self.victim[0], self.local[0]}
            for ip, mac in thread_pool.imap_unordered(arp.get_mac_address_for_ip, (ip for ip in
                                                                                   self.enumerate_all_ips_in_network(
                                                                                           source_ip, netmask))):
                if ip in ip_whitelist:
                    continue

                if mac is None or mac == "ff:ff:ff:ff:ff:ff":
                    # no mac for you, next!
                    continue

                self.targets[ip] = mac
                # todo Consider adding an upper limit on hosts being poisoned
        else:
            self.targets[self.config['from_ip'].value] = getmacbyip(self.config['from_ip'].value)
        Msg("Initializing ARP poison...")
        return self.initialize_post_spoof()
Exemple #27
0
    def _run_ddls_with_invalidation(self, db, sync_ddl=False):
        """Test INVALIDATE METADATA with concurrent DDLs to see if any queries hang"""
        test_self = self

        class ThreadLocalClient(threading.local):
            def __init__(self):
                self.client = test_self.create_impala_client()
                if sync_ddl:
                    self.client.set_configuration_option('sync_ddl', 'true')

        pool = ThreadPool(processes=8)
        tls = ThreadLocalClient()

        def run_ddls(i):
            tbl_name = db + ".test_" + str(i)
            for query_tmpl in [
                    # Create a partitioned and unpartitioned table
                    "create table %s (i int)",
                    "create table %s_part (i int) partitioned by (j int)",
                    # Below queries could fail if running with invalidate metadata concurrently
                    "alter table %s_part add partition (j=1)",
                    "alter table %s_part add partition (j=2)",
                    "invalidate metadata %s_part",
                    "refresh %s",
                    "refresh %s_part",
                    "insert overwrite table %s select int_col from functional.alltypestiny",
                    "insert overwrite table %s_part partition(j=1) values (1), (2), (3), (4), (5)",
                    "insert overwrite table %s_part partition(j=2) values (1), (2), (3), (4), (5)"
            ]:
                try:
                    query = query_tmpl % tbl_name
                    # TODO(IMPALA-9123): Timeout logic here does not work for DDLs since they are
                    #  usually stuck in CREATED state and execute_async() won't return. We finally
                    #  use timeout in pytest.mark.timeout() but it's not precise. We should find a
                    #  more elegant way to detect timeout of DDLs.
                    handle = tls.client.execute_async(query)
                    is_finished = tls.client.wait_for_finished_timeout(
                        handle, timeout=60)
                    assert is_finished, "Query timeout(60s): " + query
                    tls.client.close_query(handle)
                except ImpalaBeeswaxException as e:
                    # Could raise exception when running with INVALIDATE METADATA
                    assert TestConcurrentDdls.is_acceptable_error(
                        str(e), sync_ddl), str(e)
            # TODO(IMPALA-9123): Detect hangs here instead of using pytest.mark.timeout()
            self.execute_query_expect_success(tls.client,
                                              "invalidate metadata")

        # Run DDLs in single thread first. Some bugs causing DDL hangs can be hidden when run
        # with concurrent DDLs.
        run_ddls(0)

        # Run DDLs with invalidate metadata in parallel
        NUM_ITERS = 16
        for i in pool.imap_unordered(run_ddls, xrange(1, NUM_ITERS + 1)):
            pass
Exemple #28
0
def main(compilation_db_path, source_files, verbose, formatter, iwyu_args):
    """ Entry point. """
    # Canonicalize compilation database path
    if os.path.isdir(compilation_db_path):
        compilation_db_path = os.path.join(compilation_db_path,
                                           'compile_commands.json')

    compilation_db_path = os.path.realpath(compilation_db_path)
    if not os.path.isfile(compilation_db_path):
        print('ERROR: No such file or directory: \'%s\'' % compilation_db_path)
        return 1

    # Read compilation db from disk
    with open(compilation_db_path, 'r') as fileobj:
        compilation_db = json.load(fileobj)

    # expand symlinks
    for entry in compilation_db:
        entry['file'] = os.path.realpath(entry['file'])

    # Cross-reference source files with compilation database
    source_files = [os.path.realpath(s) for s in source_files]
    if not source_files:
        # No source files specified, analyze entire compilation database
        entries = compilation_db
    else:
        # Source files specified, analyze the ones appearing in compilation db,
        # warn for the rest.
        entries = []
        for source in source_files:
            matches = [e for e in compilation_db if e['file'] == source]
            if matches:
                entries.extend(matches)
            else:
                print('WARNING: \'%s\' not found in compilation database.' %
                      source)

    # Run analysis
    def run_iwyu_task(entry):
        cwd, compile_command = entry['directory'], entry['command']
        compile_command = workaround_parent_dir_relative_includes(
            cwd, compile_command)
        return run_iwyu(cwd, compile_command, iwyu_args, verbose)
    pool = ThreadPool(multiprocessing.cpu_count())
    try:
        for iwyu_output in pool.imap_unordered(run_iwyu_task, entries):
            formatter(iwyu_output)
    except KeyboardInterrupt as ki:
        sys.exit(1)
    except OSError as why:
        print('ERROR: Failed to launch include-what-you-use: %s' % why)
        return 1
    finally:
        pool.terminate()
        pool.join()
    return 0
Exemple #29
0
def main(compilation_db_path, source_files, verbose, formatter, iwyu_args):
    """ Entry point. """
    # Canonicalize compilation database path
    if os.path.isdir(compilation_db_path):
        compilation_db_path = os.path.join(compilation_db_path,
                                           'compile_commands.json')

    compilation_db_path = os.path.realpath(compilation_db_path)
    if not os.path.isfile(compilation_db_path):
        print('ERROR: No such file or directory: \'%s\'' % compilation_db_path)
        return 1

    # Read compilation db from disk
    with open(compilation_db_path, 'r') as fileobj:
        compilation_db = json.load(fileobj)

    # expand symlinks
    for entry in compilation_db:
        entry['file'] = os.path.realpath(entry['file'])

    # Cross-reference source files with compilation database
    source_files = [os.path.realpath(s) for s in source_files]
    if not source_files:
        # No source files specified, analyze entire compilation database
        entries = compilation_db
    else:
        # Source files specified, analyze the ones appearing in compilation db,
        # warn for the rest.
        entries = []
        for source in source_files:
            matches = [e for e in compilation_db if e['file'] == source]
            if matches:
                entries.extend(matches)
            else:
                print('WARNING: \'%s\' not found in compilation database.' %
                      source)

    # Run analysis
    def run_iwyu_task(entry):
        cwd, compile_command = entry['directory'], entry['command']
        compile_command = workaround_parent_dir_relative_includes(
            cwd, compile_command)
        return run_iwyu(cwd, compile_command, iwyu_args, verbose)
    pool = ThreadPool(multiprocessing.cpu_count())
    try:
        for iwyu_output in pool.imap_unordered(run_iwyu_task, entries):
            formatter(iwyu_output)
    except KeyboardInterrupt as ki:
        sys.exit(1)
    except OSError as why:
        print('ERROR: Failed to launch include-what-you-use: %s' % why)
        return 1
    finally:
        pool.terminate()
        pool.join()
    return 0
Exemple #30
0
def fetch_input_sizes(args, slurm_jobs):
    data_path = os.path.join(args.cache_folder, 'speed_data_sizes.csv')
    try:
        with open(data_path) as f:
            reader = DictReader(f)
            cache = {int(row['run_id']): float(row['MB'])
                     for row in reader}
    except OSError as ex:
        if ex.errno != errno.ENOENT:
            raise
        cache = {}
    session = KiveAPI(args.kive_server)
    session.login(args.kive_user, args.kive_password)
    fetcher = partial(fetch_input_size, cache=cache, kive_session=session)
    pool = ThreadPool()
    job_count = len(slurm_jobs)
    fetch_count = 0
    failed_run_ids = set()
    last_error = None
    data_file = None
    data_writer = None
    input_sizes = {}
    try:
        for i, (run_id, input_size, is_cached, error_message) in enumerate(
                pool.imap_unordered(fetcher, slurm_jobs, chunksize=10)):
            if error_message is not None:
                last_error = error_message
                failed_run_ids.add(run_id)
            if not is_cached:
                if data_file is None:
                    data_file = open(data_path, 'w')
                    data_writer = DictWriter(data_file, ['run_id', 'MB'])
                    data_writer.writeheader()
                    for old_run_id, old_input_size in input_sizes.items():
                        data_writer.writerow({'run_id': old_run_id,
                                              'MB': old_input_size})
                if fetch_count % 10000 == 0:
                    print('Fetched {} runs after scanning {} of {} at {}.'.format(
                        fetch_count,
                        i,
                        job_count,
                        datetime.now()))
                fetch_count += 1
            input_sizes[run_id] = input_size
            if data_writer:
                data_writer.writerow({'run_id': run_id, 'MB': input_size})
    finally:
        if data_file is not None:
            data_file.close()

    if failed_run_ids:
        message = 'Failed to fetch run ids: {}\n  Caused by {}'.format(
            ', '.join(sorted(failed_run_ids)),
            last_error)
        raise RuntimeError(message)
    return input_sizes
Exemple #31
0
def main():
    forkme.fork(4)
    pool = ThreadPool(8)

    engine = wait_engine()
    Session.configure(bind=engine)
    Base.metadata.create_all(engine)

    for _ in pool.imap_unordered(data_generator, range(100)):
        pass
Exemple #32
0
    def update(self):
        self._db_dir_check_existence()

        new_hashes = self._get_online_hashes()
        old_hashes = self._get_local_hashes()

        old_to_delete = set(old_hashes) - set(new_hashes)
        for old_hash in old_to_delete:
            os.remove(old_hashes[old_hash])

        new_to_download = set(new_hashes) - set(old_hashes)
        # for new_hash in new_to_download:
        #     self._download_db_file(new_hashes[new_hash])
        p = ThreadPool(20)
        p.imap_unordered(self._download_db_file,
                         (new_hashes[new_hash]
                          for new_hash in new_to_download))
        p.close()
        p.join()
Exemple #33
0
 def poolList(self, method, items):
     results = []
     if ENABLE_POOL and not DEBUG:
         pool = ThreadPool(CORES)
         results = pool.imap_unordered(method, items, chunksize=25)
         pool.close()
         pool.join()
     else: results = [method(item) for item in items]
     results = filter(None, results)
     return results
Exemple #34
0
def setcaches(urls, section=''):
    def _setcache(url):
        setcache(url, section)

    pool = ThreadPool(8)
    res = pool.imap_unordered(_setcache, urls)
    for _ in res:
        pass
    pool.close()
    pool.join()
Exemple #35
0
def poolList(method, items):
    results = []
    if ENABLE_POOL:
        pool = ThreadPool(cpu_count())
        results = pool.imap_unordered(method, items)
        pool.close()
        pool.join()
    else: results = [method(item) for item in items]
    results = filter(None, results)
    return results
Exemple #36
0
def fetch_all_csvs():
    pool = ThreadPool(4)
    results = pool.imap_unordered(fetch_csv, csv_urls)
    pool.close()
    pool.join()

    strings = [result.decode("utf-8") for result in results]
    csv_file = StringIO(''.join(strings))

    return csv_file
Exemple #37
0
    def _get_online_hashes(self):
        cur_year = datetime.datetime.now().year
        hashes = {}

        # enclosed
        def get_online_hash(year):
            r = requests.get(get_db_address(year, 'meta'))
            sha256hash = r.text.split()[-1].split(':')[-1].upper()
            hashes[sha256hash] = get_db_address(year)

        try:
            p = ThreadPool(20)
            p.imap_unordered(get_online_hash,
                             range(self.first_year, cur_year + 1))
            p.close()
            p.join()
        except requests.exceptions.RequestException as e:
            logging.error(str(e))
            return {}  # couldn't retrieve all hashes
        return hashes
Exemple #38
0
 def process_downloads(self):
     threads = len(self.url_list)
     pool = ThreadPool(threads)
     results = pool.imap_unordered(self.download_url, self.url_list)
     for index, response in results:
         if response != '':
             self.playlists_all[index].update(response)
         else:
             self.playlists_all[index]['user_info'] = []
     pool.terminate()
     self.buildPlaylistList()
Exemple #39
0
def main(args):  # pragma: no cover
    opts = parse_args(args)

    res = defaultdict(list)
    builders = get_builders()
    workers = ThreadPool(processes=opts.jobs)
    results = workers.imap_unordered(process_entry, builders)
    for result in results:
        res[result[0]].append(result[1])

    print json.dumps(res, sort_keys=True, indent=2, separators=(',', ': '))
Exemple #40
0
def test_magic_find_thread_safe(magic):
    pool = ThreadPool(32)

    m = Magic()

    assert m
    assert magic.check(find_db())

    magic.load(find_db())

    data = b'\xcf\xfa\xed\xfe\x07\x00\x00\x01\x03\x00\x00\x00\x02\x00\x00\x00'
    b'\x12\x00\x00\x000\x07\x00\x00\x85\x00 \x00\x00\x00\x00\x00\x19'

    for result in pool.imap_unordered(magic.guess_bytes, [data] * 32):
        assert 'Mach-O 64-bit x86_64 executable' in result

    magic.set_flags(mime_type=True)

    for result in pool.imap_unordered(magic.guess_bytes, [data] * 32):
        assert 'application/x-mach-binary' in result
Exemple #41
0
def main(args):  # pragma: no cover
  opts = parse_args(args)

  res = defaultdict(list)
  builders = get_builders()
  workers = ThreadPool(processes=opts.jobs)
  results = workers.imap_unordered(process_entry, builders)
  for result in results:
    res[result[0]].append(result[1])

  print json.dumps(res, sort_keys=True, indent=2, separators=(',', ': '))
Exemple #42
0
    def wrapper(args_list):
        results = []
        pool = ThreadPool()
        with tqdm_redirect_std() as orig_stdout:
            for result in tqdm(pool.imap_unordered(func, args_list), total=len(args_list),
                               file=orig_stdout, dynamic_ncols=True):
                results.append(result)
        pool.close()
        pool.join()

        return results
Exemple #43
0
def main():
    sql = 'select p.id, p.title FROM `resolved_papers` p inner join `resolved_papers_title` pt on pt.Id = p.Id WHERE downloaded = 0 and pt.`title_language` = "en" and p.id >= 38304;'
    papers = pd.read_sql(sql, con=db)
    ids = list(zip(*[papers[c].values.tolist() for c in papers]))

    pool = ThreadPool()

    print(pool.imap_unordered(_download, ids))
    pool.close()

    pool.join()
def process_threaded(img, filters, threadn = 8):

    def f(kern):
        return cv2.matchTemplate(img, kern, cv.CV_TM_CCORR_NORMED)
    pool = ThreadPool(processes=threadn)
    accum = None
    for fimg in pool.imap_unordered(f, filters):
        if (accum == None) :
           accum = np.zeros_like (fimg)
        accum += fimg * fimg
    return accum
Exemple #45
0
    def _fit(self, dataset):
        est = self.getOrDefault(self.estimator)
        epm = self.getOrDefault(self.estimatorParamMaps)
        numModels = len(epm)
        eva = self.getOrDefault(self.evaluator)
        nFolds = self.getOrDefault(self.numFolds)
        seed = self.getOrDefault(self.seed)
        h = 1.0 / nFolds
        randCol = self.uid + "_rand"
        df = dataset.select("*", F.rand(seed).alias(randCol))
        metrics = np.zeros((numModels, nFolds))

        pool = ThreadPool(processes=min(self.getParallelism(), numModels))
        subModels = None
        collectSubModelsParam = self.getCollectSubModels()
        if collectSubModelsParam:
            subModels = [[None for j in range(numModels)]
                         for i in range(nFolds)]

        for i in range(nFolds):
            if self.sequentialIndex:
                pass
                # todo pass a column name to base the split on. make sure the split conforms to sklearn norms.
                # idx = [1,2,3,4]
                # training.where(~col("id").isin(idx)).show()
            else:
                validateLB = i * h
                validateUB = (i + 1) * h
                condition = (df[randCol] >= validateLB) & (df[randCol] <
                                                           validateUB)
                validation = df.filter(condition).cache()
                train = df.filter(~condition).cache()

            tasks = self._parallelFitTasks(est, train, eva, validation, epm,
                                           collectSubModelsParam)
            for j, metric, subModel in pool.imap_unordered(
                    lambda f: f(), tasks):
                metrics[j, i] = metric
                if collectSubModelsParam:
                    subModels[i][j] = subModel

            validation.unpersist()
            train.unpersist()

        avgMetrics = np.mean(metrics, axis=1)

        if eva.isLargerBetter():
            bestIndex = np.argmax(avgMetrics)
        else:
            bestIndex = np.argmin(avgMetrics)
        bestModel = est.fit(dataset, epm[bestIndex])
        return self._copyValues(
            CrossValidatorModel(bestModel, avgMetrics.tolist(),
                                subModels)), metrics
Exemple #46
0
def imap_unordered_bar(func, args, n_processes=4):
    p = ThreadPool(n_processes)
    res_list = []
    with tqdm(total=len(args)) as pbar:
        for i, res in tqdm(enumerate(p.imap_unordered(func, args))):
            pbar.update()
            res_list.append(res)
    pbar.close()
    p.close()
    p.join()
    return res_list
def thread_pool():
    print("thread pool")
    import threading
    progress_thread = threading.Thread()
    progress_thread.daemon = True
    progress_thread.start()

    pool = ThreadPool(num_threads)
    it = pool.imap_unordered(func, args, chunksize=2)
    for s in it:
        print(s)
def each(coll, iter):
    if not len(coll):
        return [None, None]
    pool = Pool(len(coll))
    try:
        for res in pool.imap_unordered(iter, coll):
            print(res)
            pass
    except Exception as e:
        return [None, e]
    return [None, None]
Exemple #49
0
def titles():
    sql = 'select id, title from resolved_papers;'
    papers = pd.read_sql(sql, con=db)
    ids = list(zip(*[papers[c].values.tolist() for c in papers]))

    pool = ThreadPool()

    print(pool.imap_unordered(_titlesLang, ids))
    pool.close()

    pool.join()
Exemple #50
0
def main(url_file, pool_size):
    urls = load_urls(url_file)
    click.echo('URLs to collect:')
    click.echo('\n'.join(urls))
    click.echo('\nDownloading sites')
    pool = Pool(pool_size)
    with click.progressbar(pool.imap_unordered(parse_page, urls),
            length=len(urls)) as results:
#    with click.progressbar((parse_page(url) for url in urls),
#            length=len(urls)) as results:
        result = [r for r in results]
    click.echo('\n'.join(result))
def threaded_proc(vcf_files,cnv_params,reads,study,blood):
    pool = ThreadPool(48)
    moi_data = defaultdict(dict)
    task_list = [(x,cnv_params,str(reads),'threaded',study,blood) for x in vcf_files]
    try:
        moi_data = {vcf : data for vcf, data in pool.imap_unordered(arg_star, task_list)}
    except Exception:
        raise
        pool.close()
        pool.join()
        sys.exit(1)
    return moi_data
 def run_jobs(self, f, jobs):
     if self.usecloud:
         jids = cloud.map(f,
                          jobs,
                          _env=self.cloud_env,
                          _profile=True,
                          _depends_on=self.preprocess_job)
         ires = cloud.iresult(jids)
     else:
         pool = ThreadPool(processes=cv2.getNumberOfCPUs())
         ires = pool.imap_unordered(f, jobs)
     return ires
 def run_farm(self):
     try:
         self._start_sending_feedback()
         input_t = threading.Thread(target=self._sample_data)
         input_t.start()  # start sampling data
         self._progress_logger.start()
         self._db_buffer.start_input_output_cycle()  # start input and output data to/from file
         pool = ThreadPool(processes=self._max_worker)
         # pool.imap_unordered(self._check_whois_with_dns, self._db_buffer, chunksize=1)
         pool.imap_unordered(self._check_whois_with_dns, iter(self.sample_gen, None), chunksize=1)
         while not self._stop_event.is_set() or not self._internal_stop_event.is_set():
             time.sleep(1)
         if self._stop_event.is_set():
             self._internal_stop_event.set()
         input_t.join()
         self._progress_logger.join()
         self._db_buffer.terminate()
         if self._stop_event.is_set():
             self._finished = True
         self._end_sending_feedback()
     except Exception as ex:
         if self._stop_event.is_set():
             self._finished = True
         ErrorLogger.log_error("ExternalSiteChecker.WhoisChecker", ex, "run_farm() index at:" + str(self._job_done))
Exemple #54
0
    def _total_samples(self, cls, feature, _ids):
        pool = ThreadPool(cpu_count())

        feature_filter = self.feature_filter

        def x(_id):
            f = feature(_id=_id, persistence=cls)
            filtered = feature_filter(f)
            return len(filtered)

        if self.parallel:
            total_samples = sum(pool.imap_unordered(x, _ids))
        else:
            total_samples = sum(map(x, _ids))
        return total_samples
Exemple #55
0
def derive_stats(args, begin_date, init_stats=None):
  """Process raw CQ updates log and derive stats.

  Fetches raw CQ events and returns the same format as organize_stats().
  If ``init_stats`` are given, preserve the jobs stats and replace the
  other stats.
  """
  stats = init_stats or default_stats()
  filters = ['project=%s' % args.project, 'action=patch_stop']
  end_date = begin_date + datetime.timedelta(minutes=INTERVALS[args.range])
  results = fetch_cq_logs(begin_date, end_date, filters=filters)
  if not results:
    return stats

  stats['begin'] = date_from_timestamp(results[-1]['timestamp'])
  stats['end'] = date_from_timestamp(results[0]['timestamp'])

  raw_patches = set()
  for reason in results:
    raw_patches.add((reason['fields']['issue'], reason['fields']['patchset']))

  patch_stats = {}
  # Fetch and process each patchset log
  def get_patch_stats(patch_id):
    return derive_patch_stats(args, begin_date, end_date, patch_id)

  if args.seq or not args.thread_pool:
    iterable = map(get_patch_stats, raw_patches)
  else:
    pool = ThreadPool(min(args.thread_pool, len(raw_patches)))
    iterable = pool.imap_unordered(get_patch_stats, raw_patches)

  patches, issues = set(), set()
  for patch_id, pstats in iterable:
    if not pstats['supported']:
      continue
    patch_stats[patch_id] = pstats

    issue, patchset = patch_id
    issues.add(issue)
    patches.add((issue, patchset))

  stats['issue-count'] = len(issues)
  stats['patchset-count'] = len(patches)

  stats['patch_stats'] = patch_stats
  _derive_stats_from_patch_stats(stats)
  return stats
Exemple #56
0
def parse_playlists(pl_dict):
    result = {}
    try:
        pool = ThreadPool(PROCESSES)
        pfunc = parse_playlist
        args = pl_dict.keys()
        for i, (uri, streams) in enumerate(pool.imap_unordered(pfunc, args)):
            print "%d/%d" % (i + 1, len(args))
            result[uri] = (pl_dict[uri], streams)
    except Exception as e:
        print e
        return {}
    finally:
        pool.terminate()
        pool.join()

    return result
Exemple #57
0
def fetch_urls(url_iterator, download_root=None, concurrency=2, chunk_size=DEFAULT_CHUNK_SIZE):
    pool = ThreadPool(processes=concurrency)

    if download_root is not None:
        iterable = ((i, os.path.join(download_root, j)) for i, j in url_iterator)
    else:
        iterable = ((i, None) for i, j in url_iterator)

    for i in pool.imap_unordered(safe_retrieve_file, iterable):
        if not i:
            continue

        status_code, elapsed_time, url, local_filename = i

        logging.info('HTTP %d (%0.2fs) %s', status_code, elapsed_time, url)

    pool.close()
    pool.join()
def DownloadMode(corpus, request_parallelism):
  """Downloads the URLs for the specified corpus.

  Args:
    corpus: A corpus.
    request_parallelism: The number of concurrent download requests.
  """

  missing_urls = []

  for dataset in datasets:
    print 'Downloading URLs for the %s set:' % dataset

    urls_filename = '%s/wayback_%s_urls.txt' % (corpus, dataset)
    urls = ReadUrls(urls_filename)

    missing_urls_filename = '%s/missing_urls.txt' % corpus
    if os.path.exists(missing_urls_filename):
      print 'Only downloading missing URLs'
      urls = list(set(urls).intersection(ReadUrls(missing_urls_filename)))

    p = ThreadPool(request_parallelism)
    results = p.imap_unordered(DownloadMapper, izip(urls, repeat(corpus)))

    progress_bar = ProgressBar(len(urls))

    collected_urls = []
    try:
      for url, story_html in results:
        if story_html:
          collected_urls.append(url)

        progress_bar.Increment()
    except KeyboardInterrupt:
        print('Interrupted by user')
        missing_urls.extend(set(urls) - set(collected_urls))


  WriteUrls('%s/missing_urls.txt' % corpus, missing_urls)

  if missing_urls:
    print ('%d URLs couldn\'t be downloaded, see %s/missing_urls.txt.'
           % (len(missing_urls), corpus))
    print 'Try and run the command again to download the missing URLs.'
def main():
  print 'Usage: git log | %s\n\n' % sys.argv[0]
  print 'Counting (press CTRL+C to stop)'
  pool = ThreadPool(250)
  count = 0
  stats = {}
  try:
    for x in pool.imap_unordered(CheckSafe, ExtractCRFromStdin()):
      count += 1
      stats.setdefault(x, 0)
      stats[x] += 1
      stats_str = ''
      for k, v in sorted(stats.items()):
        fmt = '%s: %d (%.2f %%)  ' % (k, v, v * 100.0 / count)
        stats_str += '%-25s' % fmt
      print '\r[%d] %s' % (count, stats_str),
      sys.stdout.flush()
  except KeyboardInterrupt as e:
    pass
  print '\n\n'