Exemple #1
0
    def collect(self):
        start = default_timer()

        executor = Executor(max_workers=len(plugins))
        futures = {
            executor.submit(plugin.run): name
            for name, plugin in plugins.items()
        }
        try:
            for future in concurrent.futures.as_completed(
                    futures, timeout=cfg.scrape_timeout_seconds):
                fn_name = futures[future]
                try:
                    future.result()
                except Exception:
                    exceptions.labels(fn_name).inc()
                    log.error("plugin %r generated an exception", fn_name)
                else:
                    duration = max(default_timer() - start, 0)
                    scrape_plugin_duration_seconds.labels(fn_name).set(
                        duration)
                    log.debug("%s finished in %.02fs", fn_name, duration)
        except concurrent.futures.TimeoutError:
            for future, fn_name in futures.items():
                if not future.done():
                    timeouts.labels(fn_name).inc()
                    log.error("plugin %r timeout exceeded", fn_name)

        log.info("Scrape complete")
        return []
Exemple #2
0
async def select_now(cases: dict):
    futures = {
        key: asyncio.ensure_future(t)
        for key, t in cases.items()
    }
    completed = next(((k, f) for k, f in futures.items() if f.done()), (None, None))
    return completed
Exemple #3
0
def main(input_dir, output_dir, agency_file):
    PA_lex = get_PA_lexicon(agency_file)
    analyser = SentimentIntensityAnalyzer()
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    if os.path.isfile(input_dir):
        result = analyize_script(PA_lex, analyser, input_dir)
        if result is not None:
            result.to_csv(
                os.path.join(
                    output_dir, '%s.csv' %
                    os.path.splitext(os.path.basename(input_dir))[0]))
        return
    # must be directory
    with concurrent.futures.ProcessPoolExecutor(max_workers=6) as executor:
        futures = {}
        for jsonfile in [
                x for x in os.listdir(input_dir) if x.endswith('.json')
        ]:
            print('Analyzing %s' % jsonfile)
            future = executor.submit(analyize_script, PA_lex, analyser,
                                     os.path.join(input_dir, jsonfile))
            futures[jsonfile] = future
        print(len(futures))
        for jsonfile, future in futures.items():
            try:
                result = future.result()
                if result is not None:
                    print("Writing result %s" % jsonfile)
                    result.to_csv(
                        os.path.join(output_dir,
                                     '%s.csv' % os.path.splitext(jsonfile)[0]))
            except Exception as exc:
                print('Failed to process file; exception: %s' % exc)
Exemple #4
0
    def _download_objects(self) -> None:
        """Download objects from the specified S3 bucket and path prefix."""
        if not self._quiet:
            print("Starting downloads...")

        with concurrent.futures.ThreadPoolExecutor(
            max_workers=self._threads
        ) as executor:
            futures = {}
            while True:
                item = self._object_queue.get(block=True)
                if item is None:  # Check for sentinel value
                    break
                futures[item] = executor.submit(self._download_object, item)

            for key, future in futures.items():
                try:
                    future.result()
                    self._successful_downloads += 1
                except KeyboardInterrupt:
                    if not self._quiet:
                        print("\nThreads are exiting...")
                    executor.shutdown(wait=False)
                    self._keyboard_interrupt_exit.set()
                    raise
                except Exception as e:
                    self._failed_downloads.append((key, e))
Exemple #5
0
def main(args):
    nltk.download('names')
    input_dir = args.input
    output_dir = args.output
    cast_data = args.cast_data
    if not os.path.exists(os.path.join(output_dir, 'adjacency')):
        os.makedirs(os.path.join(output_dir, 'adjacency'))
    if not os.path.exists(os.path.join(output_dir, 'bechdel')):
        os.makedirs(os.path.join(output_dir, 'bechdel'))
    if os.path.isfile(input_dir):
        analyze_scene(input_dir, cast_data, output_dir)
        return
    with concurrent.futures.ProcessPoolExecutor(max_workers=2) as executor:
        futures = {}
        for jsonfile in [
                x for x in os.listdir(input_dir) if x.endswith('.json')
        ]:
            print('Analyzing %s' % jsonfile)
            future = executor.submit(analyze_scene,
                                     os.path.join(input_dir, jsonfile),
                                     cast_data, output_dir)
            futures[jsonfile] = future
        print(len(futures))
        for jsonfile, future in futures.items():
            try:
                result = future.result()
                # result.to_csv(os.path.join(output_dir, '%s.csv' % os.path.splitext(jsonfile)[0]))
            except Exception as exc:
                print('Failed to process file %s; exception: %s' %
                      (jsonfile, exc))
Exemple #6
0
def _remove_completed_rpcs(futures: Mapping[int, grpc.Future],
                           print_response: bool) -> None:
    logger.debug("Removing completed RPCs")
    done = []
    for future_id, future in futures.items():
        if future.done():
            _on_rpc_done(future_id, future, args.print_response)
            done.append(future_id)
    for rpc_id in done:
        del futures[rpc_id]
Exemple #7
0
async def run_rectangles_circum(exe, output_dir: str):
    futures = dict()
    loop = asyncio.get_running_loop()
    for n in range(1, CIRCUM_MAX + 1):
        rec = rectangles_circum(n)
        f = loop.run_in_executor(exe, rpack.pack, rec)
        futures[f] = rec
    output_file = os.path.join(output_dir, f'circum{CIRCUM_MAX}.pickle')
    with open(output_file, 'wb') as out_f:
        for f, rec in futures.items():
            pos = await f
            pickle.dump((rec, pos), out_f)
    print('Done:', output_file)
def extract_everything():
    """Extract everything in components.yml, respecting order requirements."""
    def q_key(comp):
        """Decide extract priority by pointer-chase depth, filesize in ties."""
        after = {c.install_after: c.name for c in component.ALL.values()}
        name, seen = comp.name, []
        while name in after:
            seen.append(name)
            name = after.get(name)
            if name in seen:
                raise ValueError('Cyclic "install_after" config detected: ' +
                                 " -> ".join(seen + [name]))
        return len(seen), os.path.getsize(comp.path)

    queue = list(component.ALL.values()) + [
        component.ALL["Dwarf Fortress"]._replace(name=path, extract_to=path)
        for path in ("curr_baseline", "graphics/ASCII")
    ]
    queue.sort(key=q_key, reverse=True)
    with concurrent.futures.ProcessPoolExecutor(8) as pool:
        futures = {}
        while queue:
            while sum(f.running() for f in futures.values()) < 8:
                for idx, comp in enumerate(queue):
                    if comp.extract_to is False:
                        assert comp.filename.endswith(".ini")
                        queue.pop(idx)
                        continue  # for Therapist, handled in build.py
                    aft = futures.get(comp.install_after)
                    # Even if it's highest-priority, wait for parent job(s)
                    if aft is None or aft.done():
                        futures[comp.name] = extract_comp(pool, queue.pop(idx))
                        break  # reset index or we might pop the wrong item
                else:
                    break  # if there was nothing eligible to extract, sleep
            time.sleep(0.01)
    failed = [k for k, v in futures.items() if v.exception() is not None]
    for key in failed:
        comp = component.ALL.pop(key, None)
        for lst in (component.FILES, component.GRAPHICS, component.UTILITIES):
            if comp in lst:
                lst.remove(comp)
    if failed:
        print("ERROR:  Could not extract: " + ", ".join(failed))
Exemple #9
0
async def run_rectangles_random(exe, output_dir: str, rec_func):
    """Build arguments of multiprocess task"""
    futures = dict()
    loop = asyncio.get_running_loop()
    progress = dict()
    for n in range(N_STEP, N_MAX + 1, N_STEP):
        for m in range(M_STEP, M_MAX + 1, M_STEP):
            prefix = rec_func.__name__.replace('rectangles_', '')
            name = f'{prefix}{n:03}n{m:04}m.pickle'
            output_file = os.path.join(output_dir, name)
            if m == M_MAX:
                s = no_samples(n)
            elif n == N_MAX:
                s = 100
            else:
                s = 10
            progress[output_file] = s
            for _ in range(s):
                rec = rec_func(n, m)
                f = loop.run_in_executor(exe, task, rec)
                futures[f] = output_file, rec

    files = dict()
    try:
        for f, (output_file, rec) in futures.items():
            pos, dt = await f
            if output_file not in files:
                files[output_file] = open(output_file, 'wb')
            out_f = files[output_file]
            pickle.dump((rec, pos, dt), out_f)
            progress[output_file] -= 1
            if progress[output_file] == 0:
                del progress[output_file]
                print('Done:', output_file, end='. ')
                print('Files remaining:', len(progress))
    finally:
        for f in files.values():
            f.close()
def load_places():
    # create a csv writer. The headings will be written in the order shown
    writer = csv.DictWriter(open('data/places.csv', 'w'),
                            ['name', 'tel', 'street', 'locality', 'region',
                             'postcode'])
    with open('data/hospital-overall-ratings.csv', 'r') as f:
        reader = csv.DictReader(f)
        # go through each row and extract the name
        names = [row['Org Name'] for row in reader]
        # create a thread pool so that we can run the requests concurrently
        extension = '/Service-Search/Disambiguation/Organisation?serviceName='
        with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
            # for every place create a future which will hold the result of
            # calling search for that place
            futures = {name: executor.submit(search, BASE + extension + name)
                       for name in names}
            for name, future in futures.items():
                res = future.result()
                if res != None:
                    # search will return all the values needed apart from the
                    # name of the place. Add this in and write the row
                    res['name'] = name
                    writer.writerow(res)
Exemple #11
0
def main():
    # Parse command line arguments.
    parser = argparse.ArgumentParser(
        description="Check source code for formatting and linter problems.")
    parser.add_argument(
        "-a",
        "--no-cache",
        action="store_true",
        help="ignore the cache and check all files",
    )
    parser.add_argument(
        "-m",
        "--allow-modifications",
        action="store_true",
        help="allow the tools to modify files",
    )
    parser.add_argument(
        "-j",
        "--num_workers",
        type=int,
        default=min(16,
                    os.cpu_count() + 2),
        help="number of parallel workers",
    )
    parser.add_argument(
        "--progressbar-wait",
        metavar="SECONDS",
        type=int,
        default=1,
        help="number seconds in between progressbar updates",
    )
    parser.add_argument("files",
                        metavar="FILE",
                        nargs="*",
                        help="files to process")
    args = parser.parse_args()

    # Say hello to the server.
    print(
        f"Running precommit checks using {args.num_workers} workers and server: {SERVER}"
    )
    server_hello = urlopen(Request(SERVER + "/"),
                           timeout=10).read().decode("utf8")
    assert server_hello.startswith("cp2k precommit server")

    # Store candidate before changing base directory and creating scratch dir.
    file_list = [os.path.abspath(fn) for fn in args.files]
    base_dir = Path(__file__).resolve().parent.parent.parent
    os.chdir(base_dir)
    SCRATCH_DIR.mkdir(parents=True, exist_ok=True)

    # Collect candidate files.
    if not file_list:
        sys.stdout.write("Searching for files...\r")
        sys.stdout.flush()
        for root, dirs, files in os.walk("."):
            if root.startswith("./tools/toolchain/build"):
                continue
            if root.startswith("./tools/toolchain/install"):
                continue
            if root.startswith("./tools/prettify/fprettify"):
                continue
            if root.startswith("./tools/build_utils/fypp"):
                continue
            if root.startswith("./tools/autotools"):
                continue
            if root.startswith("./tools/minimax_tools/1_xData"):
                continue
            if root.startswith("./data/DFTB/scc"):
                continue
            if root.startswith("./arch"):
                continue
            if root.startswith("./doxygen"):
                continue
            if root.startswith("./exts"):
                continue
            if root.startswith("./obj"):
                continue
            if root.startswith("./lib"):
                continue
            if root.startswith("./exe"):
                continue
            if root.startswith("./regtesting"):
                continue
            if root.startswith("./.git"):
                continue
            if "/.mypy_cache/" in root:
                continue
            file_list += [os.path.join(root, fn) for fn in files]

    # Filter symlinks, backup copies, and hidden files.
    file_list = [fn for fn in file_list if not os.path.islink(fn)]
    file_list = [fn for fn in file_list if not fn[-1] in ("~", "#")]
    file_list = [
        fn for fn in file_list if not os.path.basename(fn).startswith(".")
    ]

    # Sort files by size as larger ones will take longer to process.
    file_list.sort(reverse=True, key=lambda fn: os.path.getsize(fn))

    # Load cache.
    should_load_cache = CACHE_FILE.exists() and not args.no_cache
    cache = json.loads(CACHE_FILE.read_text()) if should_load_cache else {}

    # Launch async processing of files.
    futures = {}
    executor = concurrent.futures.ThreadPoolExecutor(
        max_workers=args.num_workers)
    for fn in file_list:
        if os.path.getmtime(fn) != cache.get(fn, -1):
            futures[fn] = executor.submit(process_file, fn,
                                          args.allow_modifications)
    num_skipped = len(file_list) - len(futures)

    # Continuously update progressbar, save cache file, and print errors.
    failed_files = set()
    while True:
        num_done = num_skipped
        for fn, f in futures.items():
            if f.done():
                num_done += 1
                if not f.exception():
                    cache[fn] = os.path.getmtime(fn)
                elif fn not in failed_files:
                    failed_files.add(fn)
                    print_box(fn, str(f.exception()))
        CACHE_FILE.write_text(json.dumps(cache))
        progressbar = "=" * int(60 * num_done / len(file_list))
        sys.stdout.write(
            f"[{progressbar:60s}] {num_done} / {len(file_list)} files processed\r"
        )
        sys.stdout.flush()
        if num_done == len(file_list) or len(failed_files) >= 10:
            executor.shutdown(wait=False)
            break
        sleep(args.progressbar_wait)

    # Print final message.
    print(f"Summary: Found {len(file_list)}, "
          f"skipped {num_skipped}, "
          f"checked {num_done - num_skipped}, "
          f"and failed {len(failed_files)} files." + (" " * 50))
    print("Status: " + ("FAILED" if failed_files else "OK"))
    sys.exit(len(failed_files))
Exemple #12
0
    def construct_kernel_matrix(self,
                                x1_vec,
                                x2_vec=None,
                                quantum_instance=None):
        """
        Construct kernel matrix, if x2_vec is None, self-innerproduct is conducted.

        Args:
            x1_vec (numpy.ndarray): data points, 2-D array, N1xD, where N1 is the number of data,
                                    D is the feature dimension
            x2_vec (numpy.ndarray): data points, 2-D array, N2xD, where N2 is the number of data,
                                    D is the feature dimension
            quantum_instance (QuantumInstance): quantum backend with all setting
        Returns:
            numpy.ndarray: 2-D matrix, N1xN2
        """
        self._quantum_instance = self._quantum_instance \
            if quantum_instance is None else quantum_instance
        from .qsvm_kernel import QSVMKernel

        if x2_vec is None:
            is_symmetric = True
            x2_vec = x1_vec
        else:
            is_symmetric = False

        is_statevector_sim = self.quantum_instance.is_statevector
        measurement = not is_statevector_sim
        measurement_basis = '0' * self.num_qubits
        mat = np.ones((x1_vec.shape[0], x2_vec.shape[0]))
        num_processes = psutil.cpu_count(
            logical=False) if platform.system() != "Windows" else 1

        # get all to-be-computed indices
        if is_symmetric:
            mus, nus = np.triu_indices(x1_vec.shape[0],
                                       k=1)  # remove diagonal term
        else:
            mus, nus = np.indices((x1_vec.shape[0], x2_vec.shape[0]))
            mus = np.asarray(mus.flat)
            nus = np.asarray(nus.flat)

        for idx in range(0, len(mus), QSVMKernel.BATCH_SIZE):
            circuits = {}
            to_be_simulated_circuits = []
            with concurrent.futures.ProcessPoolExecutor(
                    max_workers=num_processes) as executor:
                futures = {}
                for sub_idx in range(
                        idx, min(idx + QSVMKernel.BATCH_SIZE, len(mus))):
                    i = mus[sub_idx]
                    j = nus[sub_idx]
                    x1 = x1_vec[i]
                    x2 = x2_vec[j]
                    if not np.all(x1 == x2):
                        futures["{}:{}".format(i, j)] = \
                            executor.submit(QSVMKernel._construct_circuit,
                                            x1, x2, self.num_qubits, self.feature_map,
                                            measurement, "circuit{}:{}".format(i, j))

                for k, v in futures.items():
                    circuit = v.result()
                    circuits[k] = circuit
                    to_be_simulated_circuits.append(circuit)

            results = self.quantum_instance.execute(to_be_simulated_circuits)
            kernel_values = {}

            with concurrent.futures.ProcessPoolExecutor(
                    max_workers=num_processes) as executor:
                for idx, circuit in circuits.items():
                    kernel_values[idx] = executor.submit(
                        QSVMKernel._compute_overlap, results, circuit,
                        is_statevector_sim, measurement_basis)
                for k, v in kernel_values.items():
                    i, j = [int(x) for x in k.split(":")]
                    mat[i, j] = v.result()
                    if is_symmetric:
                        mat[j, i] = mat[i, j]
        return mat
Exemple #13
0
    def execute_together(
        cls,
        remotes,  # type: typing.Iterable[SSHClientBase]
        command,  # type: str
        timeout=constants.DEFAULT_TIMEOUT,  # type: typing.Union[int, None]
        expected=None,  # type: typing.Optional[typing.Iterable[int]]
        raise_on_err=True,  # type: bool
        **kwargs
    ):  # type: (...) -> typing.Dict[typing.Tuple[str, int], exec_result.ExecResult]
        """Execute command on multiple remotes in async mode.

        :param remotes: Connections to execute on
        :type remotes: typing.Iterable[SSHClientBase]
        :param command: Command for execution
        :type command: str
        :param timeout: Timeout for command execution.
        :type timeout: typing.Union[int, None]
        :param expected: expected return codes (0 by default)
        :type expected: typing.Optional[typing.Iterable[]]
        :param raise_on_err: Raise exception on unexpected return code
        :type raise_on_err: bool
        :return: dictionary {(hostname, port): result}
        :rtype: typing.Dict[typing.Tuple[str, int], exec_result.ExecResult]
        :raises ParallelCallProcessError: Unexpected any code at lest on one target
        :raises ParallelCallExceptions: At lest one exception raised during execution (including timeout)

        .. versionchanged:: 1.2.0 default timeout 1 hour
        .. versionchanged:: 1.2.0 log_mask_re regex rule for masking cmd
        """
        @threaded.threadpooled
        def get_result():  # type: () -> exec_result.ExecResult
            """Get result from remote call."""
            (
                chan,
                _,
                stderr,
                stdout,
            ) = remote.execute_async(command,
                                     **kwargs)  # type: _type_execute_async

            chan.status_event.wait(timeout)
            exit_code = chan.recv_exit_status()

            # pylint: disable=protected-access
            cmd_for_log = remote._mask_command(cmd=command,
                                               log_mask_re=kwargs.get(
                                                   'log_mask_re', None))
            # pylint: enable=protected-access

            result = exec_result.ExecResult(cmd=cmd_for_log)
            result.read_stdout(src=stdout)
            result.read_stderr(src=stderr)
            result.exit_code = exit_code

            chan.close()
            return result

        expected = expected or [proc_enums.ExitCodes.EX_OK]
        expected = proc_enums.exit_codes_to_enums(expected)

        futures = {}
        results = {}
        errors = {}
        raised_exceptions = {}

        for remote in set(remotes):  # Use distinct remotes
            futures[remote] = get_result()

        (
            _,
            not_done,
        ) = concurrent.futures.wait(
            list(futures.values()), timeout=timeout
        )  # type: typing.Set[concurrent.futures.Future], typing.Set[concurrent.futures.Future]

        for future in not_done:  # pragma: no cover
            future.cancel()

        for (
                remote,
                future,
        ) in futures.items():  # type: SSHClientBase, concurrent.futures.Future
            try:
                result = future.result()
                results[(remote.hostname, remote.port)] = result
                if result.exit_code not in expected:
                    errors[(remote.hostname, remote.port)] = result
            except Exception as e:
                raised_exceptions[(remote.hostname, remote.port)] = e

        if raised_exceptions:  # always raise
            raise exceptions.ParallelCallExceptions(command,
                                                    raised_exceptions,
                                                    errors,
                                                    results,
                                                    expected=expected)
        if errors and raise_on_err:
            raise exceptions.ParallelCallProcessError(command,
                                                      errors,
                                                      results,
                                                      expected=expected)
        return results
Exemple #14
0
                                            or opt_dst_location == 'dr')
     GTMs = Pri_GTM if if_has_Pri else Pub_GTM
     if if_has_Pri:
         break
 logger.info('{}'.format(GTMs))
 device_topo_info_dict = Vividict()
 topo_record = {}
 with concurrent.futures.ThreadPoolExecutor() as pool:
     t = threading.currentThread()
     logger.info('{} {}'.format(t.getName(), t.ident))
     futures = {}
     uri_prefix = '/mgmt/tm/gtm/topology/'
     for gtm_ip in GTMs:
         future = pool.submit(do_get_gtm_topo, gtm_ip)
         futures[future] = gtm_ip
     for future, gtm_ip in futures.items():
         ldns_info, dst_info, score_info = do_collate_gtm_topo(
             future.result())
         device_topo_info_dict[gtm_ip]['ldns'] = ldns_info
         device_topo_info_dict[gtm_ip]['destination'] = dst_info
         device_topo_info_dict[gtm_ip]['weight'] = score_info
     logger.info(device_topo_info_dict)
     region_ip_list = do_transfrom_ip_to_region(src_ip_list)
     random_str = ''.join(
         random.sample(string.ascii_letters + string.digits, 5))
     data_json, src_region_name = do_generate_gtm_region_json(
         region_ip_list, name_prefix="SrcRegion_{}".format(random_str))
     topo_record['SrcName'] = src_region_name
     uri_prefix = '/mgmt/tm/gtm/region'
     topo_futures = []
     for gtm_ip in GTMs:
        urlwatch_output = subprocess.check_output(
            [
                'urlwatch', '--urls', args.urls_yaml, '--test-filter',
                '{}'.format(filter_num)
            ],
            stderr=subprocess.STDOUT).decode()
    except subprocess.CalledProcessError as e:
        urlwatch_output = e.output.decode()

    return urlwatch_output


with open(args.urls_yaml, 'r') as yaml_file:
    urls_list = yaml.load_all(yaml_file, Loader=yaml.FullLoader)

    futures = {}
    with concurrent.futures.ThreadPoolExecutor(
            max_workers=args.parallelism) as executor:
        for i, url in enumerate(urls_list):
            # filters are 1-indexed
            futures[url['name']] = executor.submit(evaluate_filter, i + 1)

    for name, future in futures.items():
        urlwatch_output = str(future.result())
        if args.output_dir:
            with open(os.path.join(args.output_dir, name), 'w') as output_file:
                print(name)
                output_file.write(urlwatch_output)
        else:
            print(f"{name}:\n{urlwatch_output}")