Esempio n. 1
1
def fetch_item_info(session, observations, claims, verbose=False):
    """
    Fetches information about wikidata items.

    :Parameters:
        session : :class:`mwapi.Session`
            An API session to use for querying
        observations : `iterable`(`dict`)
            A collection of observations to annotate
        claims : `list` ( `str` )
            A set of property names to look up claims for
        verbose : `bool`
            Print dots and stuff

    :Returns:
        An `iterator` of observations augmented with an `autolabel` field
        containing the requested information.  Note that observations that
        can't be found will be excluded.
    """
    batches = chunkify(observations, 25)

    executor = ThreadPoolExecutor(max_workers=4)
    _fetch_item_info = build_fetch_item_info(session, claims)

    for annotated_batch in executor.map(_fetch_item_info, batches):
        for annotated_item in annotated_batch:
            yield annotated_item
            if verbose:
                sys.stderr.write(".")
                sys.stderr.flush()

    if verbose:
        sys.stderr.write("\n")
Esempio n. 2
0
    def fetch(self, tiles):
        """
        Execute all tile requests.

        :param tiles: List of tile requests.
        """
        pool = ThreadPoolExecutor(max_workers=32)
        pool.map(self.fetch_tile, tiles, timeout=5)
        pool.shutdown()
Esempio n. 3
0
def process_seqs(input_seqs, threads=5, extract_regions=False, known_names=None):
    """Calls map_seqs_to_ref in a multithreaded way."""

    if extract_regions:
        region_dict = load_region()
    else:
        region_dict = defaultdict(list)

    chunksize = 50
    iter_seqs = iter(input_seqs)
    chunk_iterable = yield_chunks(iter_seqs, chunksize)

    if threads > 1:
        logging.warning('Started ThreadPool with %i workers' % threads)
        ex = ThreadPoolExecutor(max_workers=threads)
        res_iter = chain.from_iterable(ex.map(map_seqs_to_ref, chunk_iterable))
    else:
        logging.warning('Running with NO THREADS!')
        res_iter = chain.from_iterable(imap(map_seqs_to_ref, chunk_iterable))

    name_count = 0
    prev_name = None
    for row in res_iter:
        if row['Name'] != prev_name:
            prev_name = row['Name']
            name_count += 1
        yield row
        for region_row in region_dict[row['RegionName']]:
            nrow = region_linker(deepcopy(row), region_row)
            if nrow:
                yield nrow
def handler(event, contest):
    logger.info("Start!")

    executor = ThreadPoolExecutor(max_workers=100)

    cal = Sum()

    queue_url = event['queueUrl']
    message_count = event['messageCount']

    queue = sqs.Queue(queue_url)
    num_of_calls = message_count // batch_count

    queues = []
    for i in range(num_of_calls):
        queues.append(queue)

    message_count = 0

    responses = executor.map(one_request, queues)
    for response in responses:
        message_count += len(response)
        for msg in response:
            cal.add(msg)

    logger.info("Receive API count: {}".format(num_of_calls))
    logger.info("Fetched messages: {}".format(message_count))

    executor.shutdown()
Esempio n. 5
0
def play(av, n):
    proxy_list = get_proxy(n)
    executor = ThreadPoolExecutor(max_workers=n)
    play_video_av = partial(play_video_1, av=av)

    for data in executor.map(play_video_av, proxy_list):
        print("in main: 1 success".format(data))
Esempio n. 6
0
def main():
    db.fixkeys(key_utils.to_ipv6)
    parser = argparse.ArgumentParser(description='Submit nodes and links to fc00')
    parser.add_argument('-v', '--verbose', help='increase output verbosity',
                        dest='verbose', action='store_true')
    parser.set_defaults(verbose=False)
    args = parser.parse_args()

    con = connect()

    nodes = dump_node_store(con)
    edges = {}

    get_peer_queue = queue.Queue(0)
    result_queue = queue.Queue(0)
    e = ThreadPoolExecutor(max_workers=4)
    def args():
        for ip,node in nodes.items():
            yield ip,keyFromAddr(node['addr']),node['path'],node['version']
    args = zip(*args())
    dbnodes = {}
    for peers, node_id, ip in e.map(get_peers_derp, *args):
        get_edges_for_peers(edges, peers, node_id)
        addpeersto(dbnodes,node_id,ip,peers)

        for ip, id in peers:
            addpeersto(dbnodes,id,ip)
    print('otay!')
    send_graph(dbnodes, edges)
    sys.exit(0)
Esempio n. 7
0
def download_all_comics(output_directory, file_prefix):
    """Download every comic getting the latest comic id from the RSS feed."""
    max_comic_id = 1
    for (comic_id, _) in get_latest_comics_from_feed():
        comic_id = int(comic_id)
        if max_comic_id < comic_id:
            max_comic_id = comic_id

    comic_id_list = list(range(1, max_comic_id + 1))
    if file_prefix:
        file_name_list = prefix(file_prefix, comic_id_list)
    else:
        file_name_list = comic_id_list
    # Kept a default of 10. We don't want to leech off XKCD.com
    executor = ThreadPoolExecutor(max_workers=10)
    executor.map(download_single_comic, comic_id_list, repeat(output_directory), file_name_list)
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("shader",
                        nargs='*',
                        default=['shaders'],
                        metavar="<shader_file | shader dir>",
                        help="A shader file or directory containing shader "
                             "files. Defaults to 'shaders/'")
    args = parser.parse_args()

    os.environ["shader_precompile"] = "true"
    os.environ["allow_glsl_extension_directive_midshader"] = "true"
    if "INTEL_DEBUG" in os.environ:
        print("Warning: INTEL_DEBUG environment variable set!", file=sys.stderr)
        os.environ["INTEL_DEBUG"] += ",vs,gs,fs"
    else:
        os.environ["INTEL_DEBUG"] = "vs,gs,fs"

    try:
        os.stat("bin/glslparsertest")
    except OSError:
        print("./bin must be a symlink to a built piglit bin directory")
        sys.exit(1)

    runtimebefore = time.time()

    filenames = process_directories(args.shader)

    executor = ThreadPoolExecutor(cpu_count())
    for t in executor.map(run_test, filenames):
        sys.stdout.write(t)

    runtime = time.time() - runtimebefore
    print("shader-db run completed in {:.1f} secs".format(runtime))
Esempio n. 9
0
def set_task(translator, translit=False):
    """
    Task Setter Coroutine

    End point destination coroutine of a purely consumer type.
    Delegates Text IO to the `write_stream` function.

    :param translation_function: Translator
    :type translation_function: Function

    :param translit: Transliteration Switch
    :type translit: Boolean
    """
    # Initialize Task Queue
    task = str()
    queue = list()

    # Function Partial
    output = "translit" if translit else "trans"
    stream = partial(write_stream, output=output)
    workers = ThreadPoolExecutor(max_workers=8)

    try:
        while True:

            task = yield
            queue.append(task)

    except GeneratorExit:
        list(map(stream, workers.map(translator, queue)))
Esempio n. 10
0
    def run(self):
        """
        Start the check run, the run is made of three stages:
        1. Preparation - get current date, clear counters, prepare queue
            of project
        2. Execution - process every project in the queue
        3. Finalize - create `db.Run` entry with counters and time
        """
        # 1. Preparation phase
        # We must convert it to datetime for comparison with sqlalchemy TIMESTAMP column
        session = db.Session()
        time = arrow.utcnow().datetime
        self.clear_counters()
        queue = self.construct_queue(time)
        total_count = len(queue)

        if not queue:
            return

        # 2. Execution
        _log.info(
            "Starting check on {} for total of {} projects".format(time, total_count)
        )
        pool_size = config.get("CRON_POOL", 10)
        pool = ThreadPoolExecutor(pool_size)
        pool.map(self.update_project, queue)

        # 3. Finalize
        _log.info(
            "Check done. Checked ({}): error ({}), success ({}), fail ({})".format(
                total_count,
                self.error_counter,
                self.success_counter,
                self.error_counter,
            )
        )

        run = db.Run(
            created_on=time,
            total_count=total_count,
            error_count=self.error_counter,
            ratelimit_count=self.ratelimit_counter,
            success_count=self.success_counter,
        )
        session.add(run)
        session.commit()
Esempio n. 11
0
def poem_from_template(template, db, sound_cache=None):
    executor = ThreadPoolExecutor(4)
    letter_sound_map = map_letters_to_sounds(db, template, sound_cache)
    process_tmpl_line = threaded(partial(extract_ruleset, db, letter_sound_map),
                                 partial(ruleset_to_line, db))
    poem_lines = executor.map(process_tmpl_line, template)
    executor.shutdown()
    return list(poem_lines)
Esempio n. 12
0
def __get_reports_for_domains_collection(domains, key, threads_count=1):
    executor = ThreadPoolExecutor(threads_count)
    reports_lists_iterator = executor.map(lambda chunk: __get_for_domains(chunk, key, RETRY_COUNT),
                                          __split(domains, CHUNK_SIZE))

    maps_list = list(reports_lists_iterator)

    return __merge_dicts(*maps_list)
Esempio n. 13
0
 def test_adding_next_tick_from_another_thread(self):
     # The test has probabilistic nature - there's a slight change it'll give a false negative
     with LoopAndGroup(quit_after=15) as ctx:
         n = 1000
         func = _make_invocation_counter(ctx.io_loop, stop_after=n)
         tpe = ThreadPoolExecutor(n)
         list(tpe.map(ctx.group.add_next_tick_callback, repeat(func, n)))
     assert n == func.count()
Esempio n. 14
0
 def run(self):
   while not self.next_queue.empty():
     queue = self.next_queue
     self.next_queue = Queue()
     targets = []
     while not queue.empty():
       targets.append(queue.get())
     executor = ThreadPoolExecutor(self.workers)
     futures = executor.map(self.check, targets)
     for future in futures:
       self.log(*future)
Esempio n. 15
0
def poem_from_template(template, db: Database, corpus_id, sound_cache=None):
    engine = get_engine(db)
    conn = engine.connect()
    executor = ThreadPoolExecutor(4)
    letter_sound_map = map_letters_to_sounds(conn, corpus_id, template, sound_cache)
    process_tmpl_line = threaded(partial(extract_ruleset, conn, corpus_id, letter_sound_map),
                                 partial(ruleset_to_line, conn, corpus_id))
    poem_lines = executor.map(process_tmpl_line, template)
    executor.shutdown()

    return list(poem_lines)
Esempio n. 16
0
    def test_many_individual(self):
        def method(_):
            r = requests.get(ROOT_URL)
            data = r.json()
            self.assertTrue(data)
            print('GG')

            return data

        with self.assertTimeTakenLessThan(80):
            pool = ThreadPoolExecutor(max_workers=10)
            list(pool.map(method, range(50)))
Esempio n. 17
0
    def resolve_mirrors(self):
        mirrors = self.get_mirrors_from_database()
        threadpool = TPE(4)

        def not_a_lambda_function(mirror):
            mirror_hostname = parse.urlparse(mirror['mirror']).hostname
            address_list = []
            for addrinfo in socket.getaddrinfo(mirror_hostname,80):
                address_list.append(addrinfo[-1][0])
            return (mirror['ID'], address_list)

        results = threadpool.map(not_a_lambda_function,mirrors)
        return list(results)
Esempio n. 18
0
def main(args):
    # Find out the configs and thier number of runs
    configs = []
    cmd = ["opp_run", "-a", args.config, "--repeat=1"]
    #                do not use repeat of ini file ^
    configs_output = subprocess.check_output(cmd)
    for line in configs_output.splitlines():
        m = re.search("Config (.*): ([0-9]*)",str(line))
        if m:
            config = m.group(1)
            runs = int(m.group(2))
            if config != "General":
                configs.append({'config':config, 'runs': runs})
    print(configs)

    # Build up commands
    cmds = []
    for repetition in range(0,int(args.repetitions)):
        for config in configs:
            for run in range(0,config['runs']):
                name = str(config['config'])+"-"+str(run)+"-"+str(repetition)
                cmds.append(["opp_run",
                       "--repeat=1",
                       "-u","Cmdenv",
                       "-r",str(run),
                       "--output-scalar-file="+args.results+"/"+name+".sca",
                       "--output-vector-file="+args.results+"/"+name+".vec",
                       "--seed-set="+str(repetition),
                       "-c",config['config'],
                       "-n", INET_DSME+"simulations:"+INET_DSME+"src:"+args.inet+"/examples:"+args.inet+"/src",
                       "-l",args.inet+"/src/INET",
                       "-l",INET_DSME+"src/inet-dsme",
                       args.config])

    executor = ThreadPoolExecutor(max_workers=int(args.jobs))
    executor.map(runcmd,cmds)
Esempio n. 19
0
def seq_align_to_ref(input_seqs, ref_seq, max_workers=None):
    """Aligns all sequences to a reference.
    """

    check_seqs = [[(name, seq), ("__ref__", ref_seq)] for name, seq in input_seqs]
    if max_workers > 1:
        executor = ThreadPoolExecutor(max_workers=max_workers)
        res = executor.map(call_muscle, check_seqs)
    else:
        res = imap(call_muscle, check_seqs)

    for alignment in res:
        adict = dict(alignment)
        name = [key for key in adict.keys() if key != "__ref__"][0]
        ref_align = seq_map_to_ref(adict[name], adict["__ref__"])
        yield name, ref_align
Esempio n. 20
0
class OpcacheManager(object):
    # Global timeout for all objects
    TIMEOUT = 5

    def __init__(self, admin_port):
        self.admin_port = admin_port
        self.threadpool = ThreadPoolExecutor(max_workers=10)

    def _invalidate_host(self, host, filename):
        url = "http://{hostname}:{port}/opcache-free".format(
            hostname=host, port=self.admin_port)
        if filename is not None:
            params = {'file': filename}
        else:
            params = {}
        try:
            result = requests.get(url, params=params, timeout=self.TIMEOUT)
            result.raise_for_status()
            return (True, None)
        except requests.exceptions.HTTPError as e:
            return (False,
                    "Response returned error {}".format(str(e)))
        except requests.exceptions.Timeout:
            return (False, 'A timeout happened before a response was received')
        except Exception as e:
            return (False, str(e))

    def invalidate(self, hosts, filename):
        """Invalidates files/directories (or all) opcache."""
        def invalidate_closure(host):
            return (host, self._invalidate_host(host, filename))

        results = self.threadpool.map(invalidate_closure, hosts)
        # Collect all failed results and return them to the caller
        return {host: result[1] for host, result in results if not result[0]}

    def invalidate_all(self, config, filename=None):
        target_groups = targets.DirectDshTargetList('mw_web_clusters', config)
        # Fallback if nothing was defined.
        if not target_groups.all:
            target_groups.primary_key = 'dsh_targets'
            target_groups.deploy_groups = None
        failed = {}
        for _, group in target_groups.groups.items():
            failed.update(self.invalidate(group.targets, filename))
        return failed
Esempio n. 21
0
    class parallel_map(collections.Iterable):

        def __init__(self, pool_size, function, *iterables):
            if not isinstance(pool_size, numbers.Integral):
                raise TypeError('pool_size must be an integer, not ' +
                                repr(pool_size))
            elif not callable(function):
                raise TypeError('function must be callable, not ' +
                                repr(function))
            elif not iterables:
                raise TypeError('missing iterable')
            self.pool = ThreadPoolExecutor(pool_size)
            self.results = self.pool.map(function, *iterables)

        def __iter__(self):
            for value in self.results:
                yield value
            self.pool.shutdown()
Esempio n. 22
0
def batch_loader(batch_list,
                 ignore_photos=False,
                 photo_size=(224, 224),
                 pad=0):
    # load all of photos using thread pool.
    photo_paths = [
        path for sample in batch_list for view in sample[3] for path in view
    ]
    pool = ThreadPoolExecutor()
    results = pool.map(lambda x: get_image(x, photo_size), photo_paths)
    pool.shutdown()

    data = [list() for i in batch_list[0]]
    for sample in batch_list:
        for i, val in enumerate(sample):
            if i in (0, 1, 2):  # reviews val=[sent_id1, sent_id2, ...]
                data[i].append(val)
            if not ignore_photos and i == 3:  # photos
                data[i].append([[next(results) for path in ps] for ps in val])
            if i == 4:  # ratings
                data[i].append(val)

    # pad sentences Ru and Ri
    max_count, max_len = 0, 0
    for ru, ri in zip(data[0], data[1]):
        max_count = max(max_count, max(len(ru), len(ri)))
        max_len = max(
            max_len, max(max([len(i) for i in ru]), max([len(i) for i in ri])))
    lengths = [0, 0, 0]
    data[0], lengths[0] = pad_reviews(data[0], max_count, max_len, pad=pad)
    data[1], lengths[1] = pad_reviews(data[1], max_count, max_len, pad=pad)
    data[2], lengths[2] = pad_reviews(data[2], pad=pad)

    return (
        torch.LongTensor(data[0]),
        torch.LongTensor(data[1]),
        torch.LongTensor(data[2]),
        torch.LongTensor(lengths[0]),
        torch.LongTensor(lengths[1]),
        torch.LongTensor(lengths[2]),
        torch.Tensor(data[3]),
        torch.Tensor(data[4]),
    )
Esempio n. 23
0
def main(name):
    start = time.time()

    # Displays all the information related with the query
    info = getBooks(name)  # Gets the books
    totalBooks = info.get("totalItems")
    books = info.get("items")
    # displays books that contains the query
    click.echo(f'TOTAL OF BOOKS FOUND: {totalBooks}')
    click.echo("BOOKS FOUND: ")

    book_list = [book.get('volumeInfo')
                 for book in books]  # Para cada libro deveulve
    executor = ThreadPoolExecutor(100)
    result = list(executor.map(parseInfo, book_list))

    for book in result:
        click.echo(book)
        click.echo("****************************************")
Esempio n. 24
0
def test_series_reductions_concurrency(method):
    from concurrent.futures import ThreadPoolExecutor

    e = ThreadPoolExecutor(10)

    np.random.seed(0)
    srs = [Series(np.random.random(10000)) for _ in range(1)]

    def call_test(sr):
        fn = getattr(sr, method)
        if method in ["std", "var"]:
            return fn(ddof=1)
        else:
            return fn()

    def f(sr):
        return call_test(sr + 1)

    list(e.map(f, srs * 50))
Esempio n. 25
0
def multi_get_md5(file_list):
    """
    get file_list md5
    :param file_list:
    :return:
    """

    pool = ThreadPoolExecutor(max_workers=5)
    results = pool.map(get_file_md5, file_list)
    pool.shutdown()

    files_md5 = {}
    for rtn in results:
        if not rtn:
            return False
        else:
            files_md5.update(rtn)

    return files_md5
Esempio n. 26
0
    def _upload(self, src, bucket, key, extra_args):
        reader = FileChunkReader(src)
        self._total_size = reader.get_size()
        chunks = reader.get_chunks()
        self._parts_number = len(chunks)

        #  upload small file by using put_object.
        if self._parts_number == 1:
            chunk = chunks[0]
            LOGGER.info("%s is not need using MultipartUpload." % key)
            self.client.put_object(Bucket=bucket, Key=key,
                                   Body=chunk.read(), **extra_args)

            self._add_finished_size(chunk.size)
            if self._progress:
                self._callback(self._total_size, self._finished_size)
            LOGGER.info("upload %s finished" % key)
            return True

        response = self.client.create_multipart_upload(
            Bucket=bucket, Key=key, **extra_args)
        upload_id = response['UploadId']

        executor = ThreadPoolExecutor(max_workers=self._threads)
        try:
            do_upload = self._upload_one_part(bucket, key,
                                              upload_id, extra_args)

            # multi upload process.
            parts = map(lambda x: x, executor.map(do_upload, chunks))
            self.client.complete_multipart_upload(
                Bucket=bucket, Key=key, UploadId=upload_id,
                MultipartUpload={'Parts': parts})
            LOGGER.info("upload %s finished" % key)

        except (KeyboardInterrupt, Exception) as error:
            executor.shutdown()
            LOGGER.error("abort upload %s %s %s" % (src, bucket, key))
            self.client.abort_multipart_upload(
                Bucket=bucket, Key=key, UploadId=upload_id)
            raise(error)
        return True
Esempio n. 27
0
    def verify(self, validator, handler, repeat=1, concurrency=10, sleep=1):
        proxy_count = len(self._proxylist)
        progress_count = 0
        def run(proxy):
            time.sleep(sleep)
            test_logs = list([validator.verify(proxy) for _ in range(repeat)])
            data = dict(proxy=proxy, test_logs=test_logs)
            handler.handle(data)
            return proxy

        excutor = ThreadPoolExecutor(max_workers=concurrency)
        for proxy in excutor.map(run, self._proxylist):
            progress_count += 1
            progress = round(progress_count / proxy_count * 100, 2)
            
            print(f'Verified [ {progress}% | {progress_count}/{proxy_count} ] {proxy.proxy_url}')
            if self._context and self._context.logger:
                self._context.logger.info(f'ProxyPool: Verified [ {progress}% | {progress_count}/{proxy_count} ] {proxy.proxy_url}.')

        handler.close()
Esempio n. 28
0
def get_all_logs(nconfigs,
                 experiment_id,
                 lock=threading.Lock(),
                 split_by_job_id=False):  # noqa: B008
    with lock:
        print(f"Collecting metrics for {experiment_id}")
    split = nconfigs[experiment_id]
    # The first key in `xp_logs` is used to differentiate jobs when `split_by_job_id` is True.
    xp_logs = defaultdict(lambda: defaultdict(dict))
    for split_key, runs in split.items():
        pool = ThreadPoolExecutor(min(MAX_RUN_LOADERS, len(runs)))
        log_loader = functools.partial(load_logs,
                                       split_by_job_id=split_by_job_id)
        metrics = pool.map(log_loader, runs)
        for metrics_per_job in metrics:
            for job_id, r, m in metrics_per_job:
                xp_logs[job_id][split_key][r] = m
    # Convert to regular dict.
    xp_logs_dict = {k: dict(v) for k, v in xp_logs.items()}
    return experiment_id, xp_logs_dict
Esempio n. 29
0
def mulAcToken(isHost, limit=''):
    print(time.strftime('%Y/%m/%d %H:%M:%S', time.localtime(time.time())))
    logger = Logger('AccessToken')
    account = MySqlDao()
    # res = account.rows("select * from nikeaccount where id >50 and id <= 100")
    if limit != '':
        res = account.rows("select * from nikeaccount limit " + limit)
    else:
        res = account.rows("select * from nikeaccount")
    print("[nkBot]>get " + str(len(res)) + " accounts Access Token")
    data = []
    for re in res:
        if re[4] and re[8]:
            data.append((re[0], re[4], re[8], isHost))
    if len(data):
        pool = ThreadPoolExecutor()
        res = list(pool.map(accessToken, data))
        logger.write(res)
    else:
        pass
Esempio n. 30
0
    def run(self, io_threads, timeout, chunk_size=10000):
        start_time = time()
        pool = ThreadPoolExecutor(max_workers=io_threads)
        gen = (line.strip() for line in sys.stdin)
        i = 0
        for chunk in self._chunker(gen, chunk_size):
            for obj in pool.map(self.do_io, chunk):
                i += 1
                if obj[1] is not None:
                    yield self.featurize(*obj)

                    if not i % self.print_interval:
                        print >> sys.stderr, json.dumps({
                            "i":
                            i,
                            "time":
                            time() - start_time,
                        })

        self.close()
Esempio n. 31
0
    def _download(self, bucket, key, dest, extra_args):
        write_file = "%s/%s" % (dest, key.split('/')[-1]) \
                     if os.path.isdir(dest) else dest
        file_size = int(self.client.head_object(
                Bucket=bucket, Key=key, **extra_args)['ContentLength'])
        self._total_size = file_size
        writer = FileChunkWriter(write_file, file_size)
        chunks = writer.get_chunks()

        executor = ThreadPoolExecutor(max_workers=self._threads)
        do_download = self._download_one_part(bucket, key, extra_args)
        try:
            map(lambda x: x, executor.map(do_download, chunks))
            writer.commit_write()
        except (KeyboardInterrupt, Exception) as error:
            writer.abort_write()
            executor.shutdown()
            LOGGER.error("abort download %s %s %s" % (bucket, key, dest))
            raise(error)
        return True
Esempio n. 32
0
def main():
    a = read_html("Bitcoin - The New York Times.html")
    links = a.find_all("a", {"class": "story-link"})
    links = [href.attrs["href"] for href in links]
    print(len(links))
    # for link in links:
    #     a = parse_article_page(link)
    #     print(a['name'])
    #     print(a["text"])
    #     with open("new_york_articles.json", "a") as myfile:
    #         myfile.write(json.dumps(a))
    pool = Pool(100)
    results = pool.map(parse_article_page, links)
    results = [res for res in results]
    result = json.dumps(results)
    with open("new_york_articles.json", "a") as myfile:
        myfile.write(result)
    for result in results:
        print(result["name"])
        print(result["text"])
Esempio n. 33
0
    def get_results(self):
        """
        Maps the resources to be downloaded, parses them
        and returns a dictionary containing the results
        """
        # Renew cookie
        self.renew_cookie()

        # Threads Powah!
        pool = ThreadPoolExecutor(max_workers=2)

        results = {
            res_name: getattr(Parser(res_data), res_name)()
            for (res_name, res_data)
            in pool.map(self.resource_fetch, self.resources)
        }

        pool.shutdown()

        return results
Esempio n. 34
0
def parallel_blast(targets, reference, num_cpu=4):
    blastdb_pth = os.path.join(tmp_dir, 'blast-%s' % GoAspect(ASPECT))
    records = [SeqRecord(Seq(seq), id) for id, seq in reference.items()]
    SeqIO.write(records, open(blastdb_pth, 'w+'), "fasta")
    os.system("makeblastdb -in %s -dbtype prot" % blastdb_pth)

    predictions = dict()
    e = ThreadPoolExecutor(num_cpu)

    def _parallel_blast_helper(s):
        return s[0], _blast(SeqRecord(Seq(s[1]), s[0]), reference, topn=None, choose_max_prob=True)

    pbar = tqdm(range(len(targets)), desc="blast2go processed")

    for tgtid, preds in e.map(_parallel_blast_helper, targets.items()):
        predictions[tgtid] = preds
        pbar.update(1)

    pbar.close()
    return predictions
Esempio n. 35
0
def deploy(subserver_list, deploy_type, is_restart_server, user_name,
           deploy_version, operation_type):

    worker_num = len(subserver_list[0])
    executor = ThreadPoolExecutor(max_workers=worker_num)

    # subserver_list格式[[1,2,3],[4,5,6],[7,8]]
    for item in subserver_list:
        if deploy_type in ['deployall', 'deploypkg', 'deploycfg']:
            cmd_list = ['prepare', 'backup', 'stop', deploy_type, 'start', 'check'] \
                if is_restart_server else ['prepare', 'backup', deploy_type, 'check']
        elif deploy_type == 'rollback':
            cmd_list = ['stop', 'rollback', 'start', 'check'] \
                if is_restart_server else ['rollback', 'check']
        elif deploy_type == 'stop':
            cmd_list = ['stop']
        elif deploy_type == 'start':
            cmd_list = ['start', 'check']
        elif deploy_type == 'restart':
            cmd_list = ['stop', 'start', 'check']
        else:
            return False
        cmd_len = len(cmd_list)
        for index, cmd in enumerate(cmd_list):
            # 根据命令的个数,计算每个命令执行完成之后的百分比
            percent_value = "%.0f%%" % ((index + 1) / cmd_len * 100)
            # 多线程版本,应用为IO密集型,适合threading模式

            server_id = []
            for itme_id in item:
                server_id.append(itme_id)
            server_len = len(server_id)
            for data in executor.map(cmd_run, server_id, [cmd] * server_len,
                                     [user_name] * server_len,
                                     [percent_value] * server_len,
                                     [deploy_version] * server_len,
                                     [operation_type] * server_len):
                if not data:
                    return False

    return True
Esempio n. 36
0
class ThreadPool:
    def __init__(self, max_workers=10):
        self.app = current_app
        self._executor = None
        self.futures = FutureCollection()
        self._executor = ThreadPoolExecutor(max_workers=max_workers)

    def submit(self, func, *args, **kwargs):
        func = self._prepare_func(func)
        return self._executor.submit(func, *args, **kwargs)

    def submit_stored(self, future_key, func, *args, **kwargs):
        future = self.submit(func, *args, **kwargs)
        self.futures.add(future_key, future)
        return future

    def shutdown(self):
        return self._executor.shutdown()

    def map(self, func, *iterables, **kwargs):
        func = self._prepare_func(func)
        return self._executor.map(func, *iterables, **kwargs)

    def job(self, func):
        return ThreadPoolJob(executor=self, func=func)

    def _copy_current_app_context(self, func):
        app_context = _app_ctx_stack.top

        def wrapper(*args, **kwargs):
            with app_context:
                return func(*args, **kwargs)

        return wrapper

    def _prepare_func(self, func):
        if isinstance(self._executor, concurrent.futures.ThreadPoolExecutor):
            if _request_ctx_stack.top is not None:
                func = copy_current_request_context(func)
            func = self._copy_current_app_context(func)
        return func
Esempio n. 37
0
    def run(self):
        """Fetch networks information from ASRank and push to wikibase. """

        self.wh.login()  # Login once for all threads
        pool = ThreadPoolExecutor()
        has_next = True
        i = 0
        while has_next:
            req = requests.get(URL_API + f'?offset={i}')
            if req.status_code != 200:
                sys.exit('Error while fetching data from API')

            ranking = json.loads(req.text)['data']['asns']
            has_next = ranking['pageInfo']['hasNextPage']

            for res in pool.map(self.update_net, ranking['edges']):
                sys.stderr.write(
                    f'\rProcessing... {i+1}/{ranking["totalCount"]}')
                i += 1

        pool.shutdown()
Esempio n. 38
0
def get_data(sector):
    query = "SELECT top 1000 * FROM obsPointing WHERE obs_collection='TESS' AND dataproduct_type='image' " \
            "AND sequence_number={}".format(sector)
    df = query_db(CAOM_OPS, query)

    df['coords'] = df.apply(lambda x: parse_s_region(x['s_region']), axis=1)

    # Generate MOC
    start_time = time.time()
    pool = ThreadPoolExecutor(max_workers=4)
    results = list(pool.map(get_polygon_moc, [row for _, row in df.iterrows()]))
    end_time = time.time()
    print('Total time : {} seconds'.format(end_time - start_time))

    # Union of MOCs
    start_time = time.time()
    moc = MOC.union(*results)
    end_time = time.time()
    print('Total time : {} seconds'.format(end_time - start_time))

    return moc
Esempio n. 39
0
 def map(self,
         func: Callable,
         iterable: Iterable,
         timeout: float=None,
         callback_timeout: Callable=None,
         daemon: bool = True
         ) -> Iterable:
     """
     :param func: the function to execute
     :param iterable: an iterable of function arguments
     :param timeout: after this time, the process executing the function
             will be killed if it did not finish
     :param callback_timeout: this function will be called, if the task
             times out. It gets the same arguments as the original function
     :param daemon: define the child process as daemon
     """
     executor = ThreadPoolExecutor(max_workers=self.processes)
     params = ({'func': func, 'fn_args': p_args, "p_kwargs": {},
                'timeout': timeout, 'callback_timeout': callback_timeout,
                'daemon': daemon} for p_args in iterable)
     return executor.map(self._submit_unpack_kwargs, params)
Esempio n. 40
0
def search(word="たこ焼き"):
    def foo(area):
        fuga = []
        for i in range(1, 100000):
            try:
                with request.urlopen(f"https://api.gnavi.co.jp/RestSearchAPI/v3/?keyid={API_KEY}&hit_per_page=100&areacode_l={area}&offset_page={i}&freeword={parse.quote(word)}&freeword_condition=2") as res:
                    # ','.join(word)
                    html = res.read().decode("utf-8")
            except Exception as e:
                break
            a = json.loads(html)
            for x in a["rest"]:
                if x["latitude"]=="" or x["longitude"]=="": continue
                fuga.append({"id":x["id"], "url": x["url"], "latitude": x["latitude"], "longitude": x["longitude"]})
        return fuga
    pool = ThreadPoolExecutor(max_workers=32)
    with request.urlopen(f"https://api.gnavi.co.jp/master/GAreaLargeSearchAPI/v3/?keyid={API_KEY}") as res:
        html = res.read().decode("utf-8")
    result = pool.map(foo, [x["areacode_l"] for x in json.loads(html)["garea_large"]])
    hoge = list(itertools.chain.from_iterable(result))
    return hoge
Esempio n. 41
0
    def use_threadpool_to_get_full_data(self):

        executor = ThreadPoolExecutor(max_workers=100)

        for i in range(0, self.rowcount):
            self.row_order = i

            for data in executor.map(
                    self.get_singe_row_data_by_response_length,
                    self.coname_list):
                self.columndata.append(data)

            print(self.columndata)

            self.full_data_dict[self.row_order] = self.columndata

            self.columndata = []

        print(self.full_data_dict)

        return self.full_data_dict
Esempio n. 42
0
class InputProcessor:
    def __init__(self, max_workers=5):
        # data precess
        self.pool = ThreadPoolExecutor(max_workers=max_workers)

    @staticmethod
    def split_content(text):

        # lambda
        split_w = lambda t, s: [i for i in t.split(s) if len(i) != 0]

        ft = [text]
        for stop in stop_words:
            for i in range(0, len(ft)):
                # split len over max len (50)
                if len(ft[i]) <= (52 - 2):
                    ft[i] = [ft[i]]
                else:
                    ft[i] = split_w(ft[i], stop)
            if len(ft) > 0:
                ft = list(np.concatenate(ft))
        return ft

    def do(self, contents):

        # split content
        list_cons = list(self.pool.map(self.split_content, contents))

        # convert to dataframe, [content, content_id]
        data = DataFrame([], columns=('text', 'id'))
        for i in range(0, len(list_cons)):
            item = list_cons[i]
            if item is None:
                data.append(['', i])
            else:
                for it in item:
                    data.loc[len(data)] = [
                        it if it is not None and len(it) != 0 else '', i
                    ]
        return data
Esempio n. 43
0
def yearly_absolute_data(pool: ThreadPoolExecutor,
                         session: Session,
                         orion_cb: ContextBroker,
                         inds: Mapping[str, indicators.Indicator],
                         year: Optional[int] = None):
    """Get absolute data for the given indicators and years"""
    granularities: List[Tuple[indicators.Indicator, str,
                              Optional[int]]] = list()
    for ind in inds.values():
        for geo_grain in ind.geographical_granularities.values():
            granularities.append((ind, geo_grain, year))

    batches = [
        granularities[base:(base + BATCH_SIZE)]
        for base in range(0, len(granularities), BATCH_SIZE)
    ]
    for batch in batches:
        logging.info(
            "Batching indicators %s",
            ", ".join("%s:%s" % (item[0].code, item[1]) for item in batch))
        orion_cb.batch(session,
                       rotate(pool.map((lambda g: ind_as_kpi(*g)), batch)))
Esempio n. 44
0
def to_pngs(texes,
            texes_outfile,
            dpi=200,
            threads=2,
            check=True,
            only_dvi=False,
            replace=False):
    '''
    texes are collection of (tex, tex_outfile)
    '''

    pool = ThreadPoolExecutor(max_workers=(max(len(texes), threads)))
    map_iter = pool.map(
        functools.partial(to_png,
                          check=check,
                          only_dvi=only_dvi,
                          replace=replace),
        texes,
        texes_outfile,
    )
    results = [result for result in map_iter]
    return results
Esempio n. 45
0
    def enumerate_definition_descriptors(self):
        executor = ThreadPoolExecutor(max_workers=16)

        # scan github repositories
        for github_org_cfg in self.job_mapping.github_organisations():
            github_cfg = self.cfg_set.github(github_org_cfg.github_cfg_name())
            github_org_name = github_org_cfg.org_name()
            logger.info(
                f'scanning github organisation {github_org_name}'
                f' with github_cfg {github_cfg.name()}'
            )

            github_api = ccc.github.github_api(github_cfg)
            github_org = github_api.organization(github_org_name)

            if self.job_mapping.secret_cfg():
                secret_cfg = self.cfg_set.secret(self.job_mapping.secret_cfg())
            else:
                secret_cfg = None

            scan_repository_for_definitions = functools.partial(
                self._scan_repository_for_definitions,
                github_cfg=github_cfg,
                org_name=github_org_name,
                secret_cfg=secret_cfg,
                job_mapping=self.job_mapping,
            )

            matching_repositories = (
                repo for repo in github_org.repositories()
                if github_org_cfg.repository_matches(repo.name)
                   and (not self.repository_filter or self.repository_filter(repo))
            )

            for definition_descriptors in executor.map(
                scan_repository_for_definitions,
                matching_repositories,
            ):
                yield from definition_descriptors
Esempio n. 46
0
def convert2tfrecords(dataset_file_path, file_suffix, tfrecord_file_path,
                      n_workers):
    filenames = os.listdir(dataset_file_path)
    if file_suffix is None:
        filenames = [f for f in list(filenames)]
    else:
        filenames = [f for f in list(filenames) if f.endswith(file_suffix)]

    def processor(filepath, filename, tfrecord_filename):
        token_dicts = None
        data_field_list = []
        data_field_list.append(
            DataSchema(name='query',
                       processor='to_np',
                       type=tf.int32,
                       dtype='int32',
                       shape=(None, ),
                       is_with_len=True))
        label_field = DataSchema(name='label',
                                 processor='to_np',
                                 type=tf.float32,
                                 dtype='float32',
                                 shape=(1, ),
                                 is_with_len=False)
        parser = TextlineParser(token_dicts, data_field_list, label_field)
        generator = TFDataset(parser=parser,
                              file_path=filepath,
                              file_suffix=filename)
        generator.to_tfrecords(tfrecord_filename)
        return tfrecord_filename

    task_param_list = [
        tuple(dataset_file_path, filename,
              tfrecord_file_path + '/' + str(i) + '.tfrecord')
        for filename, i in zip(filenames, len(filenames))
    ]
    pool = ThreadPoolExecutor(max_workers=n_workers)
    for result in pool.map(processor, task_param_list):
        print(result, 'finish')
    def load(self, N=None, random_order=True):
        """
        Load all or a subset of stored SleepStudy objects
        Data is loaded using a thread pool with one thread per SleepStudy.

        Args:
            N:              Number of SleepStudy objects to load. Defaults to
                            loading all.
            random_order:   Randomly select which of the stored objects to load
                            rather than starting from the beginning. Only has
                            an effect with N != None
        Returns:
            self, reference to the SleepStudyDataset object
        """
        from concurrent.futures import ThreadPoolExecutor
        if N is None:
            N = len(self)
            random_order = False
        not_loaded = self.non_loaded_pairs
        if random_order:
            to_load = np.random.choice(not_loaded, size=N, replace=False)
        else:
            to_load = not_loaded[:N]
        self.log("Loading {}/{} SleepStudy objects...".format(
            len(to_load), len(self)))
        pool = ThreadPoolExecutor(max_workers=min(len(to_load), 7))
        res = pool.map(lambda x: x.load(), to_load)
        try:
            for i, ss in enumerate(res):
                print(" -- {}/{}".format(i + 1, len(to_load)),
                      end="\r",
                      flush=True)
        except CouldNotLoadError as e:
            raise CouldNotLoadError("Could not load sleep study {}."
                                    " Please refer to the above "
                                    "traceback.".format(e.study_id)) from e
        finally:
            pool.shutdown()
        return self
    def crawl(self):
        """ Crawl a site recursively """

        if self.threads:
            executor = ThreadPoolExecutor(max_workers=self.concurrency)
        else:
            executor = ProcessPoolExecutor(max_workers=self.concurrency)

        with executor:
            urls = [self.url]
            url_dict = {}
            url_dict[self.url] = 1

            while len(urls):
                results = executor.map(self.fetch_url, urls, timeout=120)
                urls = []

                for result in results:
                    url, status, data = result
                    if status in (200, 301, 302):
                        # Save and parse the text
                        print('Fetched', url)
                        self.count += 1
                        self.data_map[url] = data
                        child_urls = self.parse_child_links(url, data)

                        # Push this to queue
                        for url in child_urls:
                            if not url in url_dict:
                                # print('Pushing',url)
                                url_dict[url] = 1
                                urls.append(url)

            print('\n\nCrawl complete.')
            print('Fetched', self.count, 'URLs.')
            # Dump data
            filename = 'data_%s.pkl' % self.domain
            pickle.dump(self.data_map, open(filename, 'wb'))
            print('Data saved to', filename)
Esempio n. 49
0
def deploy(subserver_list, deploy_type, is_restart_server,
           user_name, app_name, deploy_version,
           deploy_no, operation_type, env):

    worker_num = len(subserver_list[0])
    executor = ThreadPoolExecutor(max_workers=worker_num)

    # subserver_list格式[[1,2,3],[4,5,6],[7,8]]
    for item in subserver_list:
        if deploy_type not in ['rollback', 'stop', 'start', 'restart']:
            # 如果部署类型不是回滚,启停的话,则为常规的部署方式
            cmd_list = ['backup', 'stop', deploy_type, 'start', 'status'] if is_restart_server else ['backup', deploy_type]
        elif deploy_type == 'rollback':
            cmd_list = ['stop', 'rollback', 'start', 'status'] if is_restart_server else ['rollback']
        elif deploy_type == 'stop':
            cmd_list = ['stop']
        elif deploy_type == 'start':
            cmd_list = ['start', 'status']
        elif deploy_type == 'restart':
            cmd_list = ['stop', 'start', 'status']
        else:
            return False
        cmd_len = len(cmd_list)
        for index, cmd in enumerate(cmd_list):
            # 根据命令的个数,计算每个命令执行完成之后的百分比
            percent_value = "%.0f%%" % ((index+1)/cmd_len*100)
            # 多线程版本,应用为IO密集型,适合threading模式

            server = []
            for sub in item:
                server.append(sub)
            server_len = len(server)
            for data in executor.map(cmd_run, server, [deploy_type] * server_len, [cmd] * server_len,
                                     [user_name] * server_len, [percent_value] * server_len,
                                     [deploy_version] * server_len, [operation_type] * server_len):
                if not data:
                    return False

    return True
Esempio n. 50
0
def scrape(query):
    hero_query, *response_query = query.split('/')
    response_query = ''.join(response_query)
    filtered_heroes = list(
        filter(
            lambda hero: is_hero_searched(hero_query, hero['name']),
            heroes,
        ))

    if len(filtered_heroes) == 0:
        return []

    executor = ThreadPoolExecutor(len(filtered_heroes))
    results = executor.map(
        lambda hero: collect_sounds(hero, response_query),
        filtered_heroes,
    )

    return [
        single_result for results_list in results
        for single_result in results_list
    ][0:50]
Esempio n. 51
0
def _transcode_single_pass(arguments: Namespace, input_file: InputFile, scenes: SceneList, executor: ThreadPoolExecutor):
    """Transcode a given input file using Single-Pass encoding."""
    all_command_lines = _create_single_pass_command_lines(arguments, input_file, scenes)
    to_run_command_lines = _limit_and_filter_commands(arguments, all_command_lines)
    logger.info(f'About to start {len(to_run_command_lines)} scene encodes for input file "{input_file.input_file}".')
    if arguments.limit_encodes:
        # Both 0 and None evaluate to False, thus the division is safe.
        logger.info(f"This will use {round(len(to_run_command_lines)/arguments.limit_encodes*100)}% "
                    f"of the remaining encoder contingent.")
    run_methods = (cli.run for cli in to_run_command_lines)
    runs = executor.map((lambda run: run()), run_methods)
    # Filter out unsuccessful runs
    finished_command_lines = (
        command
        for command, _ in zip(to_run_command_lines, runs)  # Ignore the None returned by run()
        if command.finished)
    # tuple() drives the map execution
    successful_encodes = len(tuple(finished_command_lines))
    if arguments.limit_encodes is not None:
        # Subtract the number of successful encodes from the encode count contingent.
        # This alters the global state so that the next input file, if any, has this many less encodes available.
        arguments.limit_encodes -= successful_encodes
Esempio n. 52
0
def multithread_master(config,
                       task_func,
                       task_queue,
                       result_list,
                       db_list=None,
                       max_thread=4):
    Util.print_yellow("Process start: [%5d]" % os.getpid())

    # 建立连接池
    mysql_pool = {}
    if mysql_pool is not None:
        for db_key in db_list:
            mysql_pool[db_key] = Util.mysql_pool(config, db_key)

    # 建立线程池
    data_queue = queue.Queue(maxsize=max_thread)
    executor = ThreadPoolExecutor(max_workers=max_thread)
    data_iter = DataIterator(task_func, task_queue, data_queue, mysql_pool)

    # 动态分配任务并收集结果
    for res in executor.map(multithread_slave, data_iter):
        result_list.append(res)
Esempio n. 53
0
 def training_epoch_end(self, outputs: List[Any]) -> None:
     if self.current_epoch < 300:
         return
     if not self.current_epoch % 7 == 0:
         return
     # import multiprocessing
     # pool = multiprocessing.Pool(os.cpu_count())
     # 初始化线程池,用于计算置信度
     pool = ThreadPoolExecutor(max_workers=os.cpu_count())
     # 选择一个confidence阈值
     min_confidence = 0.01
     pesudo_labels = []  # audio_path, confidence, text
     self.eval()
     with torch.no_grad():
         for i, batch in enumerate(tqdm(self.trainer.datamodule.pseudo_train_dataloader())):
             percentage = batch[2]
             out = self.forward(batch[0].to(self.device), percentage)
             t_lengths = torch.mul(out.size(1), percentage).int()  # 输出实际长度, 因为batch中的音频长度不一,因此需要mask
             texts = self.wer.ctc_decoder_predictions_tensor(torch.argmax(out, dim=-1, keepdim=False), t_lengths)
             # 数据分片 map
             datas_seq = [(i, out[i].cpu().numpy(), t_lengths[i].cpu().numpy()) for i in range(len(out))]
             # 计算结果并汇合 reduce
             results = list(pool.map(seq_sum_logprob_np, datas_seq))
             results = sorted(results, key=lambda x:x[0], reverse=False)
             avg_probs = [item[1] for item in results]
             # avg_probs = sum_logprob(out, t_lengths)  # 置信度list, 单线程处理较慢
             for audio_path, text, prob in zip(batch[-1], texts, avg_probs):
                 if prob <= min_confidence:
                     pesudo_labels.append((audio_path, text))
         logger.info("伪标签数据量{:d}条".format(len(pesudo_labels)))
         batch_size = self.trainer.datamodule.pseudo_train_dataloader().batch_size
         total_count = len(self.trainer.datamodule.pseudo_train_dataloader())*batch_size
         logger.info("总数据量{:d}".format(total_count))
         self.pesudo_percentage = len(pesudo_labels)/total_count
         # 给训练集注入伪标签数据
         self.trainer.datamodule.inject_pesudo_datasets(pesudo_labels)
     self.trainer.reset_train_dataloader(self)  # 重新加载训练集
     self.train()
Esempio n. 54
0
def _search_all_async(type, id, info):
	logger.debug("Searching for sellers for %s/%s asynchronously"%(type, id))
	def search_module(module):
		cache_key = "vgmdb/%s/%s/sellers/%s"%(type,id,module.__name__)
		with Timer(tag=module.__name__, verbose=False):
			search = getattr(module, "search_%s"%(type,), None)
			empty = getattr(module, "empty_%s"%(type,), None)
			prev = cache.get(cache_key)
			ret = None
			if search and not prev:
				try:
					ret = search(info)
				except Exception as e:
					logger.error("Exception while searching %s for %s/%s: %s"%(module, type, id, e))
				if ret:
					cache.set(cache_key, ret)
					return ret
				else:
					return empty(info)
			return prev
	executor = ThreadPoolExecutor(max_workers=5)
	results = executor.map(search_module, search_modules, timeout=60)
	results = filter(lambda x:x, results)
	return results
Esempio n. 55
0
    def get_rect_tiles(self, x1, y1, x2, y2, parallel=False):
        """Return a PIL.Image of a rectangular map whose upper left and bottom right
        corner have tiles coordinates (x1, y1) and (x2, y2) respectively.

        If parallel=True: try to speed up the acquiring of tiles by running the
        needed calls to get_tile() asynchronously. Default False.
        """

        big = Image.new("RGB", (int((x2-x1) * self.xres), int((y2-y1) * self.yres)))

        #rows and columns of tiles containing (x1, y1) and (x2, y2)
        tiles_x1 = floor(x1)
        tiles_x2 = floor(x2)
        tiles_y1 = floor(y1)
        tiles_y2 = floor(y2)

        xdiff_pix = int(self.xres * (x1 - tiles_x1))
        ydiff_pix = int(self.yres * (y1 - tiles_y1))

        #acquire each tile needed and paste it into big
        if parallel:
            tiles_needed = [(x, y) for y in range(tiles_y1, tiles_y2+1) for x in range(tiles_x1, tiles_x2+1)]
            
            tpe = ThreadPoolExecutor(10)
            images = tpe.map(self.get_tile, *zip(*tiles_needed))
            for im, xy in zip(images, tiles_needed):
                x, y = xy
                big.paste(im, ((x-tiles_x1) * self.xres - xdiff_pix, (y-tiles_y1) * self.yres - ydiff_pix))

        else:
            for y in range(tiles_y1, tiles_y2+1):
                for x in range(tiles_x1, tiles_x2+1):
                    im = self.get_tile(x, y)
                    big.paste(im, ((x-tiles_x1) * self.xres - xdiff_pix, (y-tiles_y1) * self.yres - ydiff_pix))

        return big
Esempio n. 56
0
    class parallel_map(collections.Iterable):

        def __init__(self, pool_size, function, *iterables):
            if not isinstance(pool_size, numbers.Integral):
                raise TypeError('pool_size must be an integer, not ' +
                                repr(pool_size))
            elif not callable(function):
                raise TypeError('function must be callable, not ' +
                                repr(function))
            elif not iterables:
                raise TypeError('missing iterable')
            self.pool = ThreadPoolExecutor(pool_size)
            self.function = function
            self.results = self.pool.map(self.map_function, *iterables)

        def map_function(self, *args):
            try:
                value = self.function(*args)
            except Exception:
                return False, sys.exc_info()
            return True, value

        def __iter__(self):
            errors = []
            for success, value in self.results:
                if success:
                    yield value
                else:
                    errors.append(value)
            self.pool.shutdown()
            if PY3:
                for _, exc, tb in errors:
                    raise exc.with_traceback(tb)
            else:
                for _, exc, tb in errors:
                    exec('raise exc, None, tb')
Esempio n. 57
0
def get_wiki_summary(phrase):
    summ = []
    log.info ("pharse=%s\n", phrase)
    try:
        sentence = wikipedia.summary(phrase).split('. ')[0]
        log.debug("got: %s\n", sentence)
        summ.append(sentence)
    except wikipedia.exceptions.PageError:
        log.debug("exepted!!\n")  
        if len(phrase.split()) < 2:
            log.debug("WAT=\n")    
            return []
        
        words = [w for w in phrase.split() if len(w)>3]
        log.debug("words=%s\n", words)
        if len(words) < 1:
            return []
        else:
            log.debug('popcorn')
            ex = ThreadPoolExecutor(max_workers=MAX_WORKERS)
                #summ += list(ex.map(get_wiki_summary, phrase.split()))
            log.debug("summ before is: %s\n", summ)
            
            summ += list(itertools.chain(*ex.map(get_wiki_summary, words)))

            log.debug("summ is now: %s\n", summ)
            return summ    
            
    except wikipedia.exceptions.DisambiguationError as e:
        log.debug("excepted\n")  
        suggestions = format(str(e)).split("\n")[1:-1]
        log.debug('suggestions=%s', suggestions)
        #summ = get_wiki_summary(suggestions[random.randint(0, len(suggestions))])
        summ += get_wiki_summary(suggestions[0])
    finally:
        return summ
    def _get_all_elevation_info(self):
        lat_lngs = []

        if self.location_info != None:
            # multithread the calls to MapQuestElevationAPI to improve performance
            executor = ThreadPoolExecutor(max_workers = multiprocessing.cpu_count())
            result = executor.map(self._get_elevation_info, self.location_info['route']['locations'])

            for element in result:
                lat_lngs.append(element)

        return lat_lngs
Esempio n. 59
-1
    class thread_pool( object ):
        """
        A context manager that yields a thread pool of the given size. On normal closing,
        this context manager closes the pool and joins all threads in it. On exceptions, the pool
        will be terminated but threads won't be joined.
        """

        def __init__( self, size ):
            self.executor = ThreadPoolExecutor( size )

        def __enter__( self ):
            return self

        # noinspection PyUnusedLocal
        def __exit__( self, exc_type, exc_val, exc_tb ):
            self.executor.shutdown( wait=exc_type is None )

        def apply_async( self, fn, args, callback=None ):
            future = self.executor.submit( fn, *args )
            if callback is not None:
                future.add_done_callback( lambda f: callback( f.result( ) ) )

        def map( self, fn, iterable ):
            return list( self.executor.map( fn, iterable ) )