Example #1
0
def file_as_training_vectors(filename, gram_size):
    worker_count = 8
    with ProcessPoolExecutor(max_workers=worker_count) as exe:
        itr =  enumerate(file_as_posts(filename))
        futures = []
        for i in range(worker_count):
            try:
                idx, post = next(itr)
                futures.append(exe.submit(post_to_vector, post, gram_size))
            except StopIteration:
                break # we've submitted all the tasks already!



        while True:
            done, not_done = concurrent.futures.wait(futures, return_when=concurrent.futures.FIRST_COMPLETED)
            futures = []
            for i in done:
                yield from i.result()
                try:
                    idx, post = next(itr)
                    futures.append(exe.submit(post_to_vector, post, gram_size))
                except StopIteration:
                    # just wait it out
                    pass

            futures.extend(not_done)
            if len(futures) == 0:
                return
    def check_open_orders(self):
        """
        Loop through the list of open orders in the Portfolio object.
        For each executed order found, create a transaction and apply to the
        Portfolio.

        Returns
        -------
        list[Transaction]

        """
        futures = []

        for asset in self.open_orders:
            exchange = self.exchanges[asset.exchange]

            futures.extend([
                self.event_loop.run_in_executor(
                    self.executor, self.get_transactions_for_order,
                    asset.symbol, exchange, order)
                for order in self.open_orders[asset]
            ])

        async_results = asyncio.gather(*futures, loop=self.event_loop)
        return [
            txn for txn in self.event_loop.run_until_complete(async_results)
            if txn is not None
        ]
Example #3
0
async def execute_async(start_time):
    loop = asyncio.get_event_loop()
    import concurrent.futures
    import os
    print(os.cpu_count())
    executor = concurrent.futures.ThreadPoolExecutor()
    print(executor._max_workers)
    print()
    futures = [
        loop.run_in_executor(
            None,
            http_request,
            start_time,
            'http://example.org/'
        )
        for i in range(3)
    ]

    futures.extend([loop.run_in_executor(
        None,
        execute_sql,
        start_time,
        "select * from users where name like :string ;",
        {'string': "%ck"}
    ) for i in range(3)])

    await asyncio.gather(*futures)
Example #4
0
def main():
    args = parse_args()
    logger = logging.getLogger(config.LOGGER_NAME)

    logger.addHandler(logging.StreamHandler(sys.stdout))
    logger.setLevel(logging.DEBUG if args.verbose else logging.INFO)
    logger.debug("VERBOSE MODE ON")

    if args.output is not None:
        logger.debug(f"Saving log to: {args.output}")
        logger.addHandler(logging.FileHandler(args.output, encoding="utf-8", mode="w"))

    today = datetime.utcnow()
    start_date = (today - timedelta(days=args.max_days - 1)).date()
    end_date = today.date()

    logger.debug(f"[*] Query: {args.query}")
    logger.debug(f"[*] Date range: {start_date}~{end_date}")
    logger.debug(f"[*] Crawling targets: {', '.join(args.targets)}")

    logger.debug("[*] Running crawlers...")

    pool = concurrent.futures.ThreadPoolExecutor()
    futures = []
    for q in args.query:
        crawling_targets = [target2crawler[target.lower()]() for target in args.targets]
        futures.extend(
            [
                pool.submit(
                    lambda query, start_date, end_date, analyse, main_columns_only: c.run(
                        query=query,
                        start_date=start_date,
                        end_date=end_date,
                        analyse=analyse,
                        main_columns_only=main_columns_only,
                    ),
                    q,
                    start_date,
                    end_date,
                    args.analyse,
                    not args.all_columns,
                )
                for c in crawling_targets
            ]
        )

    try:
        for completed in concurrent.futures.as_completed(futures):
            print(f"[+] Done - {completed.result()}")
    except KeyboardInterrupt:
        # https://gist.github.com/clchiou/f2608cbe54403edb0b13
        pool._threads.clear()
        concurrent.futures.thread._threads_queues.clear()
        raise

    pool.shutdown()
Example #5
0
    def getnewdata(self):
        """Get latest data from TaDo server and add to dataframe.

        Uses multithreaded pool to get data simultaneously.
        """
        with concurrent.futures.ThreadPoolExecutor(
                max_workers=len(self.zones) + 1) as executor:
            # Load the operations then wait for them all to complete
            futures = [executor.submit(self.getweatherdata)]
            futures.extend([
                executor.submit(self.getzonedata, zone) for zone in self.zones
            ])
            futures, _ = concurrent.futures.wait(futures, timeout=30)
            return [future.result() for future in futures]
Example #6
0
    async def fuzz_domains(self, domains):
        with concurrent.futures.ThreadPoolExecutor(
                max_workers=self.threads) as executor:
            _loop = asyncio.get_event_loop()

            futures = []
            for domain in domains:
                ext = tldextract.extract(domain)
                if ext.subdomain:
                    level = ext.subdomain.count('.') + 1
                else:
                    level = 0
                if level > int(self.max_level):
                    _debug_('{} level is more than max_level({})'
                            .format(domain, self.max_level))
                    continue

                if level == 0:
                    futures.extend([
                        _loop.run_in_executor(
                            executor,
                            self.query_domain_arecord,
                            '.'.join([sub, domain]))
                        for sub in self.subs
                    ])
                else:
                    futures.extend([
                        _loop.run_in_executor(
                            executor,
                            self.query_domain_arecord,
                            '.'.join([nxt, domain]))
                        for nxt in self.nexts
                    ])
            sub_results = await asyncio.gather(*futures)
            sub_results = list(filter(lambda _: len(_[1]) != 0, sub_results))

            sub_domains = dict(sub_results).keys()

            futures = [
                _loop.run_in_executor(
                    executor,
                    self.analysis_wirdcard_domain,
                    sub_domain)
                for sub_domain in sub_domains
            ]
            sub_wd_results = await asyncio.gather(*futures)
            sub_wd_results = list(filter(lambda _: not _[1], sub_wd_results))

            return dict(sub_results), dict(sub_wd_results)
Example #7
0
    return futures


def do_work(dummy_file, work_input):
    print("{}: {}".format(dummy_file, work_input))
    print("{}: Doing work {}...".format(dummy_file, work_input))
    print('process id:', os.getpid())
    time.sleep(1)

    return work_input * work_input


if __name__ == '__main__':
    find_work_pool = ThreadPoolExecutor(max_workers=3)
    #    do_work_pool=[]
    #    for ele in range(0,3):
    #        do_work_pool.append(ProcessPoolExecutor(max_workers=2))

    dummy_files = [1, 2, 3]

    futures = []
    for dummy_file in dummy_files:
        futures.extend(
            find_work_pool.submit(find_work_inputs, dummy_file).result())

#    concurrent.futures.wait(futures)

    for dummy_file, work_input, future in futures:
        print("Result from file:{} input:{} is {}".format(
            dummy_file, work_input, sum(future)))
    celery.cache_policy_requests()
    celery.cache_credential_authorization_mapping.apply_async(countdown=180)

else:
    celery.cache_cloud_account_mapping()
    accounts_d = async_to_sync(get_account_id_to_name_mapping)(force_sync=True)
    default_celery_tasks.cache_application_information()
    executor = ThreadPoolExecutor(max_workers=os.cpu_count())
    futures = []
    for account_id in accounts_d.keys():
        futures.extend(
            [
                executor.submit(celery.cache_iam_resources_for_account, account_id),
                executor.submit(celery.cache_s3_buckets_for_account, account_id),
                executor.submit(celery.cache_sns_topics_for_account, account_id),
                executor.submit(celery.cache_sqs_queues_for_account, account_id),
                executor.submit(celery.cache_managed_policies_for_account, account_id),
                executor.submit(
                    celery.cache_resources_from_aws_config_for_account, account_id
                ),
            ]
        )
    for future in concurrent.futures.as_completed(futures):
        try:
            data = future.result()
        except Exception as exc:
            print("%r generated an exception: %s" % (future, exc))

    celery.cache_policies_table_details()
    celery.cache_policy_requests()
    celery.cache_credential_authorization_mapping()
    # Forces writing config to S3
Example #9
0
def main(time_limit, make_releases, main_only, debug, allow_unsafe):
    """Worker process for continuously building repodata for a maximum
    number of TIME_LIMIT seconds.
    """
    global DEBUG
    DEBUG = debug

    start_time = time.time()

    # refresh at the start
    refresh_github_token_and_client()

    with timer(HEAD, "pulling repos"):
        os.makedirs(WORKDIR, exist_ok=True)
        if not os.path.exists("repodata-shards"):
            _clone_repodata_shards()
        if not os.path.exists(REPODATA_NAME):
            _clone_repodata()
        if not os.path.exists("conda-forge-repodata-patches-feedstock"):
            _clone_and_init_repodata_patches()

    with timer(HEAD, "loading local data"):
        all_repodata, all_links = _load_current_data(make_releases, allow_unsafe)
        all_channeldata = {}
        all_patched_repodata = {}

    while time.time() - start_time < time_limit:
        __dt = time.time() - start_time
        print("===================================================", flush=True)
        print("===================================================", flush=True)
        print(
            "used %ds of %ds total - %ds remaining" % (
                __dt, time_limit, time_limit - __dt
            ),
            flush=True,
        )
        print("===================================================", flush=True)
        print("===================================================", flush=True)

        build_start_time = time.time()

        refresh_github_token_and_client()

        with timer(HEAD, "doing repodata products rebuild"), ThreadPoolExecutor(max_workers=8) as exec:  # noqa
            old_sha, new_sha, new_shards, removed_shards = _get_new_shards(
                all_links["current-shas"].get("repodata-shards-sha", None)
            )
            # TODO force repatch if local data is inconsistent
            old_patch_sha, new_patch_sha, patch_fns = _update_and_reimport_patch_fns(
                all_links["current-shas"].get("repodata-patches-sha", None)
            )
            repatch_all_pkgs = old_patch_sha != new_patch_sha
            utcnow = datetime.now().astimezone(pytz.UTC)

            updated_data = set()
            if (
                make_releases
                and
                # None is a full rebuild, otherwise len > 0 means we have new ones
                # to add
                # we have to make a release if we need to repatch everything as well
                (
                    new_shards is None
                    or len(new_shards) > 0
                    or removed_shards is None
                    or len(removed_shards) > 0
                    or repatch_all_pkgs
                )
            ):
                tag = utcnow.strftime("%Y.%m.%d.%H.%M.%S")
                rel = get_repodata().create_git_tag_and_release(
                    tag,
                    "",
                    tag,
                    "",
                    _get_repodata_sha(),
                    "commit",
                    draft=True,
                )
                futures = []
            else:
                # do this to catch errors
                futures = None
                rel = None

            for subdir in CONDA_FORGE_SUBIDRS:
                try:
                    _rebuild_subdir(
                        subdir=subdir,
                        new_shards=new_shards,
                        removed_shards=removed_shards,
                        repatch_all_pkgs=repatch_all_pkgs,
                        all_repodata=all_repodata,
                        all_patched_repodata=all_patched_repodata,
                        all_links=all_links,
                        updated_data=updated_data,
                        make_releases=make_releases,
                        main_only=main_only,
                        patch_fns=patch_fns,
                        futures=futures,
                        rel=rel,
                        exec=exec,
                    )
                except Exception:
                    if rel is not None and futures is not None:
                        for fn in list(all_links["serverdata"]):
                            if f"_{subdir}" in fn:
                                del all_links["serverdata"][fn]

                        # rebuild it all if we error
                        _rebuild_subdir(
                            subdir=subdir,
                            new_shards=None,
                            removed_shards=None,
                            repatch_all_pkgs=True,
                            all_repodata=all_repodata,
                            all_patched_repodata=all_patched_repodata,
                            all_links=all_links,
                            updated_data=updated_data,
                            make_releases=make_releases,
                            main_only=main_only,
                            patch_fns=patch_fns,
                            futures=futures,
                            rel=rel,
                            exec=exec,
                        )

            all_links["current-shas"]["repodata-shards-sha"] = new_sha
            all_links["current-shas"]["repodata-patches-sha"] = new_patch_sha

            if updated_data and make_releases:
                with timer(HEAD, "(re)building channel data"):
                    futures.extend(_build_channel_data(
                        all_channeldata,
                        all_links,
                        all_patched_repodata,
                        all_links["labels"],
                        updated_data,
                        rel,
                        exec,
                        make_releases=make_releases,
                        main_only=main_only,
                    ))

            if updated_data and make_releases:
                with timer(HEAD, "waiting for repo/channel data uploads to finish"):
                    for fut in concurrent.futures.as_completed(futures):
                        fname, url = fut.result()
                        if fname not in all_links["serverdata"]:
                            all_links["serverdata"][fname] = []
                        all_links["serverdata"][fname].append(url)
                        if len(all_links["serverdata"][fname]) > 3:
                            all_links["serverdata"][fname] = \
                                all_links["serverdata"][fname][-3:]
                    futures = []

                with timer(HEAD, "writing and uploading links"):
                    all_links["updated_at"] = utcnow.strftime("%Y-%m-%d %H:%M:%S %Z%z")
                    futures.extend(
                        _write_compress_and_start_upload(
                            all_links,
                            "links.json",
                            rel,
                            exec,
                            only_compress=True,
                        )
                    )
                    concurrent.futures.wait(futures)

                with timer(HEAD, "publishing release", result=False):
                    rel.update_release(rel.title, rel.body, draft=False)

            if make_releases:
                with timer(HEAD, "deleting old releases"):
                    tags = delete_old_repodata_releases(all_links)
                    for tag in tags:
                        print(f"{HEAD}deleted release {tag}", flush=True)

        dt = int(time.time() - build_start_time)

        if dt < MIN_UPDATE_TIME:
            print(
                "REPO WORKER: waiting for %s seconds before "
                "next update" % (MIN_UPDATE_TIME - dt),
                flush=True,
            )
            time.sleep(MIN_UPDATE_TIME - dt)

    if DEBUG:
        with timer(HEAD, "dumping all data to JSON"):
            with open(f"{WORKDIR}/all_repodata.json", "w") as fp:
                json.dump(all_repodata, fp, indent=2, sort_keys=True)
            with open(f"{WORKDIR}/all_patched_repodata.json", "w") as fp:
                json.dump(all_patched_repodata, fp, indent=2, sort_keys=True)
            with open(f"{WORKDIR}/all_links.json", "w") as fp:
                json.dump(all_links, fp, indent=2, sort_keys=True)
            with open(f"{WORKDIR}/all_channeldata.json", "w") as fp:
                json.dump(all_channeldata, fp, indent=2, sort_keys=True)
Example #10
0
def _rebuild_subdir(
    *, subdir, new_shards, removed_shards, repatch_all_pkgs,
    all_repodata, all_patched_repodata, all_links, updated_data,
    make_releases, main_only, patch_fns, futures, rel, exec,
):
    if new_shards is not None:
        new_subdir_shards = [
            k
            for k in new_shards
            if k.startswith(f"repodata-shards/shards/{subdir}/")
        ]
    else:
        # this is a sentinal that indicates a full rebuild
        new_subdir_shards = None

    if removed_shards is not None:
        removed_subdir_shards = [
            k
            for k in removed_shards
            if k.startswith(f"repodata-shards/shards/{subdir}/")
        ]
    else:
        # this is a sentinal that indicates a full rebuild
        removed_subdir_shards = None

    if subdir not in all_repodata:
        all_repodata[subdir] = {}
    if subdir not in all_patched_repodata:
        all_patched_repodata[subdir] = {}

    subdir_updated_data = set()

    with timer(HEAD, "processing shards for subdir %s" % subdir):
        if (
            new_subdir_shards is None
            or len(new_subdir_shards) > 0
            or removed_subdir_shards is None
            or len(removed_subdir_shards) > 0
        ):
            with timer(HEAD, "making repodata", indent=1):
                subdir_updated_data = _update_repodata_from_shards(
                    all_repodata,
                    all_links,
                    new_subdir_shards,
                    removed_subdir_shards,
                    subdir,
                )
                updated_data |= subdir_updated_data
                all_labels = set(all_links["labels"])
                all_labels |= set(
                    [label for label in all_patched_repodata[subdir]])
                all_labels |= set(
                    [label for label in all_repodata[subdir]])
                all_links["labels"] = sorted(all_labels)

        if make_releases and (subdir_updated_data or repatch_all_pkgs):
            with timer(HEAD, "patching and writing repodata", indent=1):
                for label in all_links["labels"]:
                    if (
                        (subdir, label) not in updated_data
                        and not repatch_all_pkgs
                    ):
                        continue
                    if main_only and label != "main":
                        continue

                    if label not in all_repodata[subdir]:
                        all_repodata[subdir][label] = \
                            _fetch_repodata(all_links, subdir, label)

                    if label not in all_patched_repodata[subdir]:
                        all_patched_repodata[subdir][label] = \
                            _fetch_patched_repodata(
                                all_links, subdir, label
                            )

                    if label == "broken":
                        all_patched_repodata[subdir][label] = copy.deepcopy(
                            all_repodata[subdir][label]
                        )
                    else:
                        _patch_repodata(
                            all_repodata[subdir][label],
                            all_patched_repodata[subdir][label],
                            subdir,
                            patch_fns,
                            do_all=repatch_all_pkgs,
                        )

                    futures.extend(_write_compress_and_start_upload(
                        all_patched_repodata[subdir][label],
                        f"repodata_{subdir}_{label}.json",
                        rel,
                        exec,
                    ))

            with timer(
                HEAD, "building and writing current repodata", indent=1
            ):
                for label in all_links["labels"]:
                    if (
                        (subdir, label) not in updated_data
                        and not repatch_all_pkgs
                    ):
                        continue
                    if main_only and label != "main":
                        continue

                    if label not in all_patched_repodata[subdir]:
                        with timer(
                            HEAD,
                            f"fetching patched repodata for "
                            f"{label}/{subdir}",
                            indent=3,
                        ):
                            all_patched_repodata[subdir][label] = \
                                _fetch_patched_repodata(
                                    all_links, subdir, label
                                )

                    crd = build_current_repodata(
                        subdir,
                        all_patched_repodata[subdir][label],
                        )

                    futures.extend(_write_compress_and_start_upload(
                        crd,
                        f"current_repodata_{subdir}_{label}.json",
                        rel,
                        exec,
                    ))

            with timer(
                HEAD, "writing repodata from packages", indent=1
            ):
                for label in all_links["labels"]:
                    if (subdir, label) not in updated_data:
                        continue
                    if main_only and label != "main":
                        continue

                    futures.extend(_write_compress_and_start_upload(
                        all_repodata[subdir][label],
                        f"repodata_from_packages_{subdir}_{label}.json",
                        rel,
                        exec,
                    ))
Example #11
0
def main():
    args = parse_args()
    logging.basicConfig(level=getattr(logging, args.log_level.upper()))

    # Load list of profiles from file.
    #
    # If file does not exist or list is empty, the profile list is initialized
    # with the default profile directory path.
    df_profiles = load_profiles_info(args.profiles_path)
    drop_version_errors(df_profiles,
                        missing=False,
                        mismatch=True,
                        inplace=True)

    # Save most recent list of profiles to disk.
    with args.profiles_path.open('w') as output:
        profiles_str = yaml.dump(
            df_profiles[SAVED_COLUMNS].astype(str).to_dict('records'),
            default_flow_style=False)
        output.write(profiles_str)

    # Look up major version of each profile.
    df_profiles['major_version'] = df_profiles.path.map(profile_major_version)

    # Perform the following tasks in the background:
    #
    #  - Upgrade `microdrop-launcher` package
    #  - Cache latest `microdrop` package version
    with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:

        def _auto_upgrade():
            process = sp.Popen(
                [sys.executable, '-m', 'microdrop_launcher.auto_upgrade'])

            return process.communicate()

        def _cache_latest_microdrop_version():
            process = sp.Popen(
                [sys.executable, '-m', 'microdrop_launcher.microdrop_version'])

            return process.communicate()

        def _launch(args, df_profiles):
            if args.default or (not args.no_auto
                                and df_profiles.shape[0] == 1):
                # Launch MicroDrop with most recently used (or only available) profile.
                return_code = launch_profile_row(df_profiles.iloc[0])
                if return_code == 0:
                    df_profiles.used_timestamp[0] = str(dt.datetime.now())
            else:
                # Display dialog to manage profiles or launch a profile.
                launch_dialog = LaunchDialog(df_profiles)
                launch_dialog.run()
                return_code = launch_dialog.return_code
                df_profiles = launch_dialog.df_profiles

            # Save most recent list of profiles to disk (most recently used first).
            #
            # List can be changed using dialog by:
            #  - Creating a new profile.
            #  - Importing a profile.
            #  - Updating used timestamp by launching a profile.
            df_profiles = df_profiles.astype(str)
            df_profiles.loc[df_profiles.used_timestamp == 'nan',
                            'used_timestamp'] = ''
            df_profiles.sort_values('used_timestamp',
                                    ascending=False,
                                    inplace=True)

            with args.profiles_path.open('w') as output:
                profiles_str = yaml.dump(
                    df_profiles[SAVED_COLUMNS].to_dict('records'),
                    default_flow_style=False)
                output.write(profiles_str)
            return return_code

        futures = []
        if not args.no_upgrade:
            upgrade_future = executor.submit(_auto_upgrade)
            microdrop_version_future = \
                executor.submit(_cache_latest_microdrop_version)
            futures.extend([upgrade_future, microdrop_version_future])
        launch_future = executor.submit(_launch, args, df_profiles)
        futures.append(launch_future)
        concurrent.futures.wait(futures)
        return launch_future.result()