Python ThreadPool.close Exemples, multiprocessing.pool.ThreadPool.close Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : Search-scan.py Projet : SESARLab/mooncloud_probe_vuln

    def worker(self, db, lista):
        '''
        Metodo per eseguire il processo di ricerca dei plugin in multithread
        Multithread method for online search
        '''

        # Make the Pool of workers
        processes = 5 
        #WARNING: con la fibra posso arrivare a 20 senza errori, con adsl massimo 4 worker!
        pool = Pool(processes)

        # Open the urls in their own threads and return the results
        pluglist = pool.map(onlinePluginSearch, lista)

        #close the pool and wait for the work to finish
        pool.close()
        pool.join()

        #parsa il risultato (lista con tuple) e metti tutto in una stringa (result) e aggiorna cache
        result = ''
        for item in pluglist:
            if item[1] !=[]:
                for plug in item[1]:
                    db.updateCache(item[0], plug)
                    result = result + str(plug) + ','

        numbers = result.count(',') + 1
        print("Number of available pflugins: %s" % numbers)
        print("Adding to policy plugins: 19506,10287,12634 for credential checks and ping target.")
        result = result + "19506,10287,12634"
        #aggiungo sempre questi 3 plug-in per verificare se il target e' alive

        return result

Exemple #2

0

Afficher le fichier

Fichier : tests.py Projet : Arroon/openage

def demo(args):
    """ Demonstrates the Python logging facility. """

    cli = argparse.ArgumentParser()
    cli.add_argument("--verbose", "-v", action='count', default=ENV_VERBOSITY)
    cli.add_argument("--quiet", "-q", action='count', default=0)
    args = cli.parse_args(args)

    level = verbosity_to_level(args.verbose - args.quiet)

    info("new log level: " + str(level))
    old_level = set_loglevel(level)
    info("old level was: " + str(old_level))

    info("printing some messages with different log levels")

    spam("rofl")
    dbg("wtf?")
    info("foo")
    warn("WARNING!!!!")
    err("that didn't go so well")
    crit("pretty critical, huh?")

    info("restoring old loglevel")

    set_loglevel(old_level)

    info("old loglevel restored")
    info("running some threaded stuff")

    pool = ThreadPool()
    for i in range(8):
        pool.apply_async(info, ("async message #" + str(i),))
    pool.close()
    pool.join()

Exemple #3

0

Afficher le fichier

Fichier : base.py Projet : GaretJax/pm

def update(args=None):
    projects = list_projects(False, args.dir)

    print("Update in progress...")

    if args.j:
        pool = Pool(args.j)

        def worker(p):
            if p.is_behind():
                p.update()
                print("{} updated".format(p.name))

        for p in projects:
            pool.apply_async(worker, (p,))

        pool.close()
        pool.join()
    else:
        for p in projects:
            if p.is_behind():
                p.update()
                print("{} updated".format(p.name))

    print("Update done")

Exemple #4

0

Afficher le fichier

Fichier : tattled.py Projet : nickmaccarthy/Tattle

def main():
    # Run the Tales 
    pool = ThreadPool(processes=int(tcfg['Workers'].get('pool_size', 10)))
    pool = ThreadPool()
    pool.map(worker, tales)
    pool.close()
    pool.join()

Exemple #5

0

Afficher le fichier

Fichier : RunnerUtils.py Projet : lpp1985/lpp_Script

def local_job_runner(cmds_list, num_threads, throw_error=True):
    """
    Execute a list of cmds locally using thread pool with at most
    num_threads threads, wait for all jobs to finish before exit.

    If throw_error is True, when any job failed, raise RuntimeError.
    If throw_error is False, return a list of cmds that failed.

    Parameters:
      cmds_list - cmds that will be executed in ThreadPool
      num_threads - number of threads that will be used in the ThreadPool
      throw_error - whether or not to throw RuntimeError when any of cmd failed.
      rescue - whether or not to rescue this job
      rescue_times - maximum number of rescue times
    """
    run_cmd_in_shell = lambda x: backticks(x, merge_stderr=True)
    try:
        pool = ThreadPool(processes=num_threads)
        rets = pool.map(run_cmd_in_shell, cmds_list)
        pool.close()
        pool.join()
    except subprocess.CalledProcessError:
        pass

    failed_cmds = [cmds_list[i] for i in range(0, len(cmds_list)) if rets[i][1] != 0]
    failed_cmds_out = [rets[i][0] for i in range(0, len(cmds_list)) if rets[i][1] != 0]

    if throw_error and len(failed_cmds) > 0:
        errmsg = "\n".join(["CMD failed: %s, %s" % (cmd, out)
                            for (cmd, out) in zip(failed_cmds, failed_cmds_out)])
        raise RuntimeError(errmsg)
    else:
        return failed_cmds

Exemple #6

0

Afficher le fichier

Fichier : static_sync.py Projet : ericflo/django-simplestatic

    def handle_noargs(self, **options):
        mimetypes.init()

        locked_print("===> Syncing static directory")
        pool = ThreadPool(20)

        # Sync every file in the static media dir with S3
        def pooled_sync_file(base, filename):
            pool.apply_async(self.sync_file, args=[base, filename])

        self.walk_tree([conf.SIMPLESTATIC_DIR], pooled_sync_file)
        pool.close()
        pool.join()
        locked_print("===> Static directory syncing complete")

        locked_print("===> Compressing and uploading CSS and JS")
        pool = ThreadPool(20)

        # Iterate over every template, looking for SimpleStaticNode
        def pooled_handle_template(base, filename):
            pool.apply_async(self.handle_template, args=[base, filename])

        self.walk_tree(list(settings.TEMPLATE_DIRS), pooled_handle_template)
        pool.close()
        pool.join()
        locked_print("===> Finished compressing and uploading CSS and JS")

Exemple #7

0

Afficher le fichier

Fichier : acm_proc.py Projet : dragonxlwang/s3e

def main(dir_path, outfile_path, is_journal=True):
    pn = 20
    flst = os.listdir(dir_path)
    arglst = []
    ret = dict()
    for i in range(pn):
        beg = int(math.ceil(float(len(flst)) / pn * i))
        end = int(math.ceil(float(len(flst)) / pn * (i + 1)))
        if(id == 0):
            beg = 0
        if(id == pn - 1):
            end = (len(flst))
        arglst.append([dir_path, is_journal, beg, end, i, ret])
    pool = ThreadPool(pn)
    pool.map(job_map, arglst)
    pool.close()
    pool.join()
    print(80 * '=')
    print('[acmdl]: map finished')
    print(80 * '=')
    job_reduce(ret, outfile_path)
    print(80 * '=')
    print('[acmdl]: reduce finished')
    print(80 * '=')
    return

Exemple #8

0

Afficher le fichier

Fichier : teardown_script.py Projet : AharonShachar/vCenterShell

    def _power_off_and_delete_all_vm_resources(self, api, reservation_details):
        resources = reservation_details.ReservationDescription.Resources

        pool = ThreadPool()
        async_results = []
        lock = Lock()
        message_status = {
            "power_off": False,
            "delete": False
        }

        for resource in resources:
            resource_details = api.GetResourceDetails(resource.Name)
            if resource_details.VmDetails:
                result_obj = pool.apply_async(self._power_off_or_delete_deployed_app,
                                              (api, resource_details, lock, message_status))
                async_results.append(result_obj)

        pool.close()
        pool.join()

        resource_to_delete = []
        for async_result in async_results:
            result = async_result.get()
            if result is not None:
                resource_to_delete.append(result)

        # delete resource - bulk
        if resource_to_delete:
            api.DeleteResources(resource_to_delete)

Exemple #9

0

Afficher le fichier

Fichier : sftp_connector.py Projet : guptarajat/data-connectors

	def read(self, sftppath, localPath = None, numParallelConnections = 1):
		if localPath is None:
			localPath = os.getcwd() # local path - can be changed later
		sftp = paramiko.SFTPClient.from_transport(self.transport)
		if (numParallelConnections > 1):
			pool = ThreadPool(numParallelConnections)

		def getFile(sftppath, localpath):
			pconnection = SFTPConnection(self.connectionInfo)
			pconnection.connect()
			psftp = paramiko.SFTPClient.from_transport(pconnection.transport)
			psftp.get(sftppath, localpath)
			psftp.close()
			pconnection.close()

		def recursiveRead(sftp, sftppath, localPath):
			fileattr = sftp.lstat(sftppath)
			if not stat.S_ISDIR(fileattr.st_mode): #it is a file
				if (numParallelConnections > 1):
					pool.apply_async(getFile, args= (sftppath, os.path.join(localPath, os.path.basename(sftppath))))
				else:
					sftp.get(sftppath, os.path.join(localPath, os.path.basename(sftppath)))
			else: #it is a directory
				try: #creating local directory, using try-catch to handle race conditions
					os.makedirs(os.path.join(localPath, os.path.basename(sftppath)))
				except OSError as exception:
					if exception.errno != errno.EEXIST:
						raise
				for file in sftp.listdir_attr(sftppath):
					recursiveRead(sftp, os.path.join(sftppath, file.filename), os.path.join(localPath, os.path.basename(sftppath)))
		recursiveRead(sftp, sftppath, localPath)
		sftp.close()
		if (numParallelConnections > 1):
			pool.close()
			pool.join()

Exemple #10

0

Afficher le fichier

Fichier : benchmark_lz4.py Projet : manahl/arctic

def bench_compression_comparison(n_chunks, df_length, append_mul, pool_size, pool_step, repeats,
                                 use_raw_lz4, use_HC):
    _str = construct_test_data(df_length, append_mul)
    chunk_size = len(_str) / 1024 ** 2.0
    _strarr = [_str] * n_chunks

    # Single threaded
    # ---------------
    measurements = bench_single(repeats, _strarr, use_HC)
    print_results(1, chunk_size, n_chunks, chunk_size*n_chunks, measurements)
    single_mean = np.mean(measurements)

    # Multi-threaded
    # --------------
    for sz in range(2, pool_size + 1, pool_step):
        if use_raw_lz4:
            pool = ThreadPool(sz)
        else:
            pool = None
            c.set_compression_pool_size(sz)
        measurements = bench_multi(repeats, _strarr, use_HC, pool=pool)
        print_results(sz, chunk_size, n_chunks, chunk_size * n_chunks, measurements, compare=single_mean)
        if pool:
            pool.close()
            pool.join()
    print("")

Exemple #11

0

Afficher le fichier

Fichier : KuaidailiProxyGenerator.py Projet : myme5261314/http_proxy_service

    def extract(url):
        """TODO: Docstring for extract.
        :returns: TODO

        """
        try:
            r = rs.get(url)
            soup = bs(r.text, 'html.parser')
            tr_list = soup.tbody.find_all('tr')
        except AttributeError:
            print r
            return None
        info_list = []
        for tr in tr_list:
            td_list = tr.find_all('td')
            if td_list[2].text.strip() == u'高匿名' and\
                    'HTTP' in td_list[3].text.strip(' ').split(',') and\
                    'GET' in td_list[4].text.strip(' ').split(','):
                infos = list()
                infos.append(td_list[0].text.strip())
                infos.append(td_list[1].text.strip())
                infos.append(td_list[6].text.strip()[:-1])
                if td_list[7].text.find(u'小时') != -1:
                    infos.append(float(td_list[7].text[:-3]) * 3600)
                else:
                    infos.append(float(td_list[7].text[:-3]) * 60)
                info_list.append(infos)
        p = Pool(len(info_list))
        proxy_list = p.map(wrapper, info_list)
        p.close()
        return proxy_list

Exemple #12

0

Afficher le fichier

Fichier : backend.py Projet : Chateaudur/mopidy-youtube

def resolve_playlist(url):
    resolve_pool = ThreadPool(processes=16)
    logger.info("Resolving YouTube-Playlist '%s'", url)
    playlist = []

    page = 'first'
    while page:
        params = {
            'playlistId': url,
            'maxResults': 50,
            'key': yt_key,
            'part': 'contentDetails'
        }
        if page and page != "first":
            logger.debug("Get YouTube-Playlist '%s' page %s", url, page)
            params['pageToken'] = page

        result = session.get(yt_api_endpoint+'playlistItems', params=params)
        data = result.json()
        page = data.get('nextPageToken')

        for item in data["items"]:
            video_id = item['contentDetails']['videoId']
            playlist.append(video_id)

    playlist = resolve_pool.map(resolve_url, playlist)
    resolve_pool.close()
    return [item for item in playlist if item]

Exemple #13

0

Afficher le fichier

Fichier : testrunner.py Projet : gevent/gevent

    def _run_tests(self):
        "Runs the tests, produces no report."
        run_alone = []

        tests = self._tests
        pool = ThreadPool(self._worker_count)
        try:
            for cmd, options in tests:
                options = options or {}
                if matches(self._configured_run_alone_tests, cmd):
                    run_alone.append((cmd, options))
                else:
                    self._spawn(pool, cmd, options)
            pool.close()
            pool.join()

            if run_alone:
                util.log("Running tests marked standalone")
                for cmd, options in run_alone:
                    self._run_one(cmd, **options)
        except KeyboardInterrupt:
            try:
                util.log('Waiting for currently running to finish...')
                self._reap_all()
            except KeyboardInterrupt:
                pool.terminate()
                raise
        except:
            pool.terminate()
            raise

Exemple #14

0

Afficher le fichier

Fichier : stress.py Projet : HausenSjtu/Hackathon

def thread(host, port, threads, num):
    pool = ThreadPool(threads)
    for _ in range(num):
        pool.apply_async(job, (host, port))
        time.sleep(0.001)
    pool.close()
    pool.join()

Exemple #15

0

Afficher le fichier

Fichier : __init__.py Projet : alaattinturyan/commons

  def check_artifact_cache(self, vts):
    """Checks the artifact cache for the specified VersionedTargetSets.

    Returns a list of the ones that were satisfied from the cache. These don't require building.
    """
    if not vts:
      return [], []

    cached_vts = []
    uncached_vts = OrderedSet(vts)
    if self._artifact_cache and self.context.options.read_from_artifact_cache:
      pool = ThreadPool(processes=6)
      res = pool.map(lambda vt: self._artifact_cache.use_cached_files(vt.cache_key),
                     vts, chunksize=1)
      pool.close()
      pool.join()
      for vt, was_in_cache in zip(vts, res):
        if was_in_cache:
          cached_vts.append(vt)
          uncached_vts.discard(vt)
          self.context.log.info('Using cached artifacts for %s' % vt.targets)
          vt.update()
        else:
          self.context.log.info('No cached artifacts for %s' % vt.targets)
    return cached_vts, list(uncached_vts)

Exemple #16

0

Afficher le fichier

Fichier : threading_runner.py Projet : artshmelev/noseapp

    def run(self, suites):
        wrapper = self.config.plugins.prepareTest(suites)
        if wrapper is not None:
            suites = wrapper

        wrapped = self.config.plugins.setOutputStream(self.stream)
        if wrapped is not None:
            self.stream = wrapped

        result = self._makeResult()

        size = self.config.options.thread_pool
        if size < 0:
            size = cpu_count()

        pool = ThreadPool(size)

        with measure_time(result):

            for suite in suites:
                pool.apply_async(suite, args=(result,))

            pool.close()
            pool.join()

        self.config.plugins.finalize(result)
        return result

Exemple #17

0

Afficher le fichier

Fichier : xmlpdf.py Projet : robdunne-uom/et-pubmed-xml-pdf

	def downloadPDFs(self):
		### Download all the files extracted from the metadata
		startTime = time.strftime("%c")
		# Loop through the CSV
		f = open(self.csvpath)
		metadata = csv.reader(f, quotechar='"', delimiter=',', quoting=csv.QUOTE_ALL, skipinitialspace=True)
		
		for row in metadata:
			pmcid = row[8]
			
			### Check the input is a PMC ID
			if 'PMC' in pmcid:
				print('Starting thread for: '+pmcid)
				
				pool = Pool(30)
				pool.apply_async(self.saveFile, (pmcid,))
				pool.close()
				pool.join()
			else:
				print('Something is wrong. '+pmcid+' is not a PMC id')
				sys.exit(0)
			
		f.close()
		
		print('Finished downloading all files: start {} end {}.'.format(startTime, time.strftime("%c")))

Exemple #18

0

Afficher le fichier

Fichier : streamlist.py Projet : drunkrx/livestreamer-curses

    def check_online_streams(self):
        self.all_streams_offline = True
        self.set_status(' Checking online streams...')

        done_queue   = queue.Queue()

        def check_stream_managed(args):
            url, queue = args
            status = self._check_stream(url)
            done_queue.put(url)
            return status

        pool = Pool(self.config.CHECK_ONLINE_THREADS)
        args = [(s['url'], done_queue) for s in self.streams]
        statuses = pool.map_async(check_stream_managed, args)
        n_streams = len(self.streams)

        while not statuses.ready():
            sleep(0.1)
            self.set_status(' Checked {0}/{1} streams...'.format(done_queue.qsize(), n_streams))
            self.s.refresh()

        statuses = statuses.get()
        for i, s in enumerate(self.streams):
            s['online'] = statuses[i]
            if s['online']:
                self.all_streams_offline = False

        self.refilter_streams()
        self.last_autocheck = int(time())

        pool.close()

Exemple #19

0

Afficher le fichier

Fichier : ons_resolver.py Projet : ibrahimahmed443/opendig

def ons_resolver(key):

    def check_server(server):

        try:
            namecoind = NamecoindServer(server, NAMECOIND_PORT, NAMECOIND_USER, NAMECOIND_PASSWD)
            return_data = namecoind.get_full_profile('u/' + key)
            return return_data
        except:
            return error_reply("Couldn't connect to namecoind")


    pool = ThreadPool(len(ONS_SERVERS))

    replies = pool.map(check_server, ONS_SERVERS)
    pool.close()
    pool.join() 

    data_hashes = []
    for reply in replies:
        data_hashes.append(hashlib.md5(json.dumps(reply)).hexdigest())

    count = Counter(data_hashes)
    max_repeated_times = count.most_common()[0][1]
    
    if max_repeated_times >= (SERVER_CONFIRMATION_PERCENTAGE/100.0) * len(ONS_SERVERS):
        return replies[0]
    else:
        return error_reply("Data from different ONS servers doens't match")

Exemple #20

0

Afficher le fichier

Fichier : example.py Projet : mhjohnson/memory-profiling-requests

def main():
    pool = ThreadPool(10)
    base_url = 'https://www.google.com/?gws_rd=ssl#q='
    urls = [base_url+str(i) for i in xrange(1000)]
    pool.map(google_search, urls)
    pool.close()
    pool.join()

Exemple #21

0

Afficher le fichier

Fichier : server.py Projet : Rensselaer-AI-League/GeneralizedGameServer

	def poll_all(self, recipient_infos):
		# Recipient_info entries are of form: (player, type, body)
		results = dict()
		threads = dict()

		# For each recipient, make an asynchronous process to handle their response
		num_reqs = len(recipient_infos)
		pool = ThreadPool(processes=num_reqs)
		for info in recipient_infos:
			# Unpack poll() args
			receiver = info[0]
			rq_type  = info[1]
			body     = info[2]

			# Run each poll on a separate thread
			threads[receiver] = pool.apply_async(self.poll, (receiver, rq_type, body,))

		# Get the results, store them in a dict
		# Seems like it defeats the purpose of polling asynchronously, but it doesn't (brain teaser?)
		for info in recipient_infos:
			receiver = info[0]
			try:
				results[receiver] = threads[receiver].get(timeout=self.timeout)
			except Exception as e:
				self.log_error(e)
				results[receiver] = None # Worry about this later

		# Clean up those threads
		pool.close()
		pool.join()

		# Return the dict
		return results

Exemple #22

0

Afficher le fichier

Fichier : parallel.py Projet : earthreader/libearth

        class parallel_map(collections.Iterable):

            def __init__(self, pool_size, function, *iterables):
                if not isinstance(pool_size, numbers.Integral):
                    raise TypeError('pool_size must be an integer, not ' +
                                    repr(pool_size))
                elif not callable(function):
                    raise TypeError('function must be callable, not ' +
                                    repr(function))
                elif not iterables:
                    raise TypeError('missing iterable')
                self.pool = ThreadPool(pool_size)
                self.function = function
                self.results = self.pool.imap_unordered(self.map_function,
                                                        zip(*iterables))

            def map_function(self, args):
                try:
                    value = self.function(*args)
                except Exception:
                    return False, sys.exc_info()
                return True, value

            def __iter__(self):
                errors = []
                for success, value in self.results:
                    if success:
                        yield value
                    else:
                        errors.append(value)
                self.pool.close()
                self.pool.join()
                for error in errors:
                    exec('raise error[1], None, error[2]')

Exemple #23

0

Afficher le fichier

Fichier : mass_downloader.py Projet : calum-chamberlain/obspy

    def _initialize_clients(self):
        """
        Initialize all clients.
        """
        logger.info("Initializing FDSN client(s) for %s." % ", ".join(
            _i.base_url if hasattr(_i, "base_url") else _i
            for _i in self.providers))

        def _get_client(client_name):
            # It might already be an initialized client - in that case just
            # use it.
            if isinstance(client_name, Client):
                name, client = client_name.base_url, client_name
            else:
                try:
                    this_client = Client(client_name, debug=self.debug)
                    name, client = client_name, this_client
                except utils.ERRORS as e:
                    if "timeout" in str(e).lower():
                        extra = " (timeout)"
                    else:
                        extra = ""
                    logger.warn("Failed to initialize client '%s'.%s"
                                % (client_name, extra))
                    return client_name, None

            services = sorted([_i for _i in client.services.keys()
                               if not _i.startswith("available")])
            if "dataselect" not in services or "station" not in services:
                logger.info("Cannot use client '%s' as it does not have "
                            "'dataselect' and/or 'station' services."
                            % name)
                return name, None
            return name, client

        # Catch warnings in the main thread. The catch_warnings() context
        # manager does not reliably work when used in multiple threads.
        p = ThreadPool(len(self.providers))
        with warnings.catch_warnings(record=True) as w:
            warnings.simplefilter("always")
            clients = p.map(_get_client, self.providers)
        p.close()
        for warning in w:
            logger.debug("Warning during initialization of one of the "
                         "clients: " + str(warning.message))

        clients = {key: value for key, value in clients if value is not None}

        # Write to initialized clients dictionary preserving order. Remember
        # that each passed provider might already be an initialized client
        # instance.
        for client in self.providers:
            if client not in clients and client not in clients.values():
                continue
            name = client.base_url if hasattr(client, "base_url") else client
            self._initialized_clients[name] = clients[name]

        logger.info("Successfully initialized %i client(s): %s."
                    % (len(self._initialized_clients),
                       ", ".join(self._initialized_clients.keys())))

Exemple #24

0

Afficher le fichier

Fichier : pabot.py Projet : tangkun75/pabot

def _parallel_execute(datasources, options, outs_dir, pabot_args, suite_names):
    original_signal_handler = signal.signal(signal.SIGINT, keyboard_interrupt)
    pool = ThreadPool(pabot_args['processes'])
    if (pabot_args.has_key("hostsfile")):
        hosts = [host.rstrip('\r\n') for host in open(pabot_args["hostsfile"])]
    else:
        hosts = None
    if pabot_args["verbose"]:
        print [(suite,host) for (suite,host) in TestsuitesHosts(suite_names, hosts)]
    result = pool.map_async(execute_and_wait_with,
               [(datasources,
                 outs_dir,
                 options,
                 suite,
                 pabot_args['command'],
                 pabot_args['verbose'],
                 host)
                for (suite,host) in TestsuitesHosts(suite_names, hosts)])
    pool.close()
    while not result.ready():
        # keyboard interrupt is executed in main thread and needs this loop to get time to get executed
        try:
            time.sleep(0.1)
        except IOError:
            keyboard_interrupt()
    signal.signal(signal.SIGINT, original_signal_handler)

Exemple #25

0

Afficher le fichier

Fichier : client.py Projet : fvigotti/kafka-tools

    def _send_some_brokers(self, requests, ignore_errors=True):
        """
        Sends a request to one or more brokers. The responses are returned mapped to the broker that
        they were retrieved from. This method uses a thread pool to parallelize sends.

        Args:
            request (int -> BaseRequest): A dictionary, where keys are integer broker IDs and the values are valid
                request objects that inherit from BaseRequest.

        Returns:
            dict (int -> BaseResponse): A map of broker IDs to response instances (inherited from
                BaseResponse). Failed requests are represented with a value of None
        """
        results = {}
        pool = ThreadPool(processes=self.configuration.broker_threads)
        for broker_id in requests:
            results[broker_id] = pool.apply_async(self._send_to_broker, (broker_id, requests[broker_id]))
        pool.close()
        pool.join()

        responses = {}
        for broker_id in results:
            try:
                responses[broker_id] = results[broker_id].get()
            except ConnectionError:
                if ignore_errors:
                    # Individual broker failures are OK, as we'll represent them with a None value
                    responses[broker_id] = None
                else:
                    raise
        return responses

Exemple #26

0

Afficher le fichier

Fichier : amcates.py Projet : amcat/amcat

    def get_used_properties(self, set_ids=None, article_ids=None, **filters):
        """
        Returns a sequency of property names in use in the specified set(s) (or setids)
        """
        if set_ids is not None:
            filters["sets"] = set_ids

        if article_ids is not None:
            filters["ids"] = article_ids

        all_properties = self.get_properties()
        flexible_properties = set(all_properties) - set(ALL_FIELDS)

        body = {"query": {"bool": {"must": [
            build_filter(**filters),
            {"exists": {"field": "fakeprop"}}
        ]}}}

        bodies = (copy.deepcopy(body) for _ in range(len(flexible_properties)))
        pool = ThreadPool()
        results = pool.imap(self._get_used_properties, zip(bodies, flexible_properties))

        try:
            for found, prop in zip(results, flexible_properties):
                if found:
                    yield prop
        finally:
            pool.close()

Exemple #27

0

Afficher le fichier

Fichier : pabot.py Projet : roamingunner/rf-libs

def _parallel_execute(datasources, options, outs_dir, pabot_args, suite_names):
    original_signal_handler = signal.signal(signal.SIGINT, keyboard_interrupt)
    pool = ThreadPool(pabot_args['processes'])
    if pabot_args.get("vectors"):
        result = pool.map_async(execute_and_wait_with,
                    [(datasources,
                     outs_dir,
                     options,
                     suite,
                     pabot_args['command'],
                     pabot_args['verbose'],
                     vector)
                    for suite in suite_names
                    for vector in pabot_args['vectors']])
    else:
        result = pool.map_async(execute_and_wait_with,
                    [(datasources,
                     outs_dir,
                     options,
                     suite,
                     pabot_args['command'],
                     pabot_args['verbose'],
                     None)
                    for suite in suite_names])
    pool.close()
    while not result.ready():
        # keyboard interrupt is executed in main thread and needs this loop to get time to get executed
        try:
            time.sleep(0.1)
        except IOError:
            keyboard_interrupt()
    signal.signal(signal.SIGINT, original_signal_handler)

Exemple #28

0

Afficher le fichier

Fichier : artifact_list_builder.py Projet : wfkbuilder/maven-repository-builder

    def _listArtifacts(self, urls, gavs):
        """
        Loads maven artifacts from list of GAVs and tries to locate the artifacts in one of the
        specified repositories.

        :param urls: repository URLs where the given GAVs can be located
        :param gavs: List of GAVs
        :returns: Dictionary where index is MavenArtifact object and value is it's repo root URL.
        """
        def findArtifact(gav, urls, artifacts):
            artifact = MavenArtifact.createFromGAV(gav)
            for url in urls:
                if maven_repo_util.gavExists(url, artifact):
                    #Critical section?
                    artifacts[artifact] = ArtifactSpec(url)
                    return

            logging.warning('Artifact %s not found in any url!', artifact)

        artifacts = {}
        pool = ThreadPool(maven_repo_util.MAX_THREADS)
        for gav in gavs:
            pool.apply_async(findArtifact, [gav, urls, artifacts])

        # Close the pool and wait for the workers to finnish
        pool.close()
        pool.join()

        return artifacts

Exemple #29

0

Afficher le fichier

Fichier : async_rl.py Projet : gandalfvn/hierarchical_rl

    def run(self):
        pool = ThreadPool(self.num_agents)
        for idx in range(self.num_agents):
            pool.apply_async(self.run_experiement, args=(self.experiment, idx))

        pool.close()
        pool.join()

Exemple #30

0

Afficher le fichier

Fichier : tempobj.py Projet : hitflame/aliyun-odps-python-sdk

    def cleanup(self, odps):
        cleaned = []

        def cleaner_thread(obj):
            try:
                obj.drop(odps)
                cleaned.append(obj)
            except:
                pass

        pool = ThreadPool(CLEANER_THREADS)
        if self._container:
            pool.map(cleaner_thread, self._container)
            pool.close()
            pool.join()
        for obj in cleaned:
            if obj in self._container:
                self._container.remove(obj)
        if not self._container:
            try:
                os.unlink(self._file_name)
            except OSError:
                pass
        else:
            self.dump()

Exemple #31

0

Afficher le fichier

    def __convert_dataset_to_coco(self,
                                  dataset: entities.Dataset,
                                  local_path,
                                  filters=None,
                                  annotation_filter=None):
        pages = dataset.items.list(filters=filters)
        dataset.download_annotations(local_path=local_path)
        path_to_dataloop_annotations_dir = os.path.join(local_path, 'json')

        labels = [label.tag for label in dataset.labels]
        np_labels = np.array(labels)
        class_list = np.unique(np_labels)

        label_to_id = {
            name: i
            for i, name in enumerate(class_list)
            if name not in ["done", 'completed', 'approved']
        }
        categories = [{
            'id': i,
            'name': name
        } for name, i in label_to_id.items()]

        images = [None for _ in range(pages.items_count)]
        converted_annotations = [None for _ in range(pages.items_count)]
        item_id_counter = 0
        pool = ThreadPool(processes=11)
        pbar = tqdm.tqdm(total=pages.items_count)
        for page in pages:
            for item in page:
                pool.apply_async(func=self.__single_item_to_coco,
                                 kwds={
                                     'item': item,
                                     'images': images,
                                     'path_to_dataloop_annotations_dir':
                                     path_to_dataloop_annotations_dir,
                                     'item_id': item_id_counter,
                                     'converted_annotations':
                                     converted_annotations,
                                     'annotation_filter': annotation_filter,
                                     'label_to_id': label_to_id,
                                     'pbar': pbar
                                 })
                item_id_counter += 1

        pool.close()
        pool.join()
        pool.terminate()
        pbar.close()

        total_converted_annotations = list()
        for ls in converted_annotations:
            if ls is not None:
                total_converted_annotations += ls

        coco_json = {
            'images': [image for image in images if image is not None],
            'annotations': total_converted_annotations,
            'categories': categories
        }

        with open(os.path.join(local_path, 'coco.json'), 'w+') as f:
            json.dump(coco_json, f)

        return coco_json

Exemple #32

0

Afficher le fichier

Fichier : api_client.py Projet : saiury/mbed-cloud-sdk-python

class ApiClient(object):
    """
    Generic API client for Swagger client library builds.

    Swagger generic API client. This client handles the client-
    server communication, and is invariant across implementations. Specifics of
    the methods and models for each application are generated from the Swagger
    templates.

    NOTE: This class is auto generated by the swagger code generator program.
    Ref: https://github.com/swagger-api/swagger-codegen
    Do not edit the class manually.

    :param configuration: .Configuration object for this client
    :param header_name: a header to pass when making calls to the API.
    :param header_value: a header value to pass when making calls to
        the API.
    :param cookie: a cookie to include in the header when making calls
        to the API
    """

    PRIMITIVE_TYPES = (float, bool, bytes, text_type) + integer_types
    NATIVE_TYPES_MAPPING = {
        'int': int,
        'long': int if PY3 else long,
        'float': float,
        'str': str,
        'bool': bool,
        'date': date,
        'datetime': datetime,
        'object': object,
    }

    def __init__(self,
                 configuration=None,
                 header_name=None,
                 header_value=None,
                 cookie=None):
        if configuration is None:
            configuration = Configuration()
        self.configuration = configuration

        self.pool = ThreadPool()
        self.rest_client = RESTClientObject(configuration)
        self.default_headers = {}
        if header_name is not None:
            self.default_headers[header_name] = header_value
        self.cookie = cookie
        # Set default User-Agent.
        self.user_agent = 'Swagger-Codegen/1.0.0/python'
        ########### Change
        # Store last api call metadata
        self.last_metadata = {}
        ########### End Change

    def __del__(self):
        self.pool.close()
        self.pool.join()

    @property
    def user_agent(self):
        """User agent for this API client"""
        return self.default_headers['User-Agent']

    @user_agent.setter
    def user_agent(self, value):
        self.default_headers['User-Agent'] = value

    def set_default_header(self, header_name, header_value):
        self.default_headers[header_name] = header_value

    ########### Change
    def metadata_wrapper(fn):
        """Save metadata of last api call."""
        @functools.wraps(fn)
        def wrapped_f(self, *args, **kwargs):
            self.last_metadata = {}
            self.last_metadata["url"] = self.configuration.host + args[0]
            self.last_metadata["method"] = args[1]
            self.last_metadata["timestamp"] = time.time()
            try:
                return fn(self, *args, **kwargs)
            except Exception as e:
                self.last_metadata["exception"] = e
                raise

        return wrapped_f

    def get_last_metadata(self):
        return self.last_metadata

    ########### End Change

    @metadata_wrapper
    def __call_api(self,
                   resource_path,
                   method,
                   path_params=None,
                   query_params=None,
                   header_params=None,
                   body=None,
                   post_params=None,
                   files=None,
                   response_type=None,
                   auth_settings=None,
                   _return_http_data_only=None,
                   collection_formats=None,
                   _preload_content=True,
                   _request_timeout=None):

        config = self.configuration

        # header parameters
        header_params = header_params or {}
        header_params.update(self.default_headers)
        if self.cookie:
            header_params['Cookie'] = self.cookie
        if header_params:
            header_params = self.sanitize_for_serialization(header_params)
            header_params = dict(
                self.parameters_to_tuples(header_params, collection_formats))

        # path parameters
        if path_params:
            path_params = self.sanitize_for_serialization(path_params)
            path_params = self.parameters_to_tuples(path_params,
                                                    collection_formats)
            for k, v in path_params:
                # specified safe chars, encode everything
                resource_path = resource_path.replace(
                    '{%s}' % k,
                    quote(str(v), safe=config.safe_chars_for_path_param))

        # query parameters
        if query_params:
            query_params = self.sanitize_for_serialization(query_params)
            query_params = self.parameters_to_tuples(query_params,
                                                     collection_formats)

        # post parameters
        if post_params or files:
            post_params = self.prepare_post_parameters(post_params, files)
            post_params = self.sanitize_for_serialization(post_params)
            post_params = self.parameters_to_tuples(post_params,
                                                    collection_formats)

        # auth setting
        self.update_params_for_auth(header_params, query_params, auth_settings)

        # body
        if body:
            body = self.sanitize_for_serialization(body)

        # request url
        url = self.configuration.host + resource_path

        # perform request and return response
        response_data = self.request(method,
                                     url,
                                     query_params=query_params,
                                     headers=header_params,
                                     post_params=post_params,
                                     body=body,
                                     _preload_content=_preload_content,
                                     _request_timeout=_request_timeout)

        self.last_response = response_data

        return_data = response_data
        if _preload_content:
            # deserialize response data
            if response_type:
                return_data = self.deserialize(response_data, response_type)
            else:
                return_data = None

        ########### Change
        self.last_metadata["response"] = response_data
        self.last_metadata["return_data"] = return_data
        ########### End Change

        if _return_http_data_only:
            return (return_data)
        else:
            return (return_data, response_data.status,
                    response_data.getheaders())

    def sanitize_for_serialization(self, obj):
        """
        Builds a JSON POST object.

        If obj is None, return None.
        If obj is str, int, long, float, bool, return directly.
        If obj is datetime.datetime, datetime.date
            convert to string in iso8601 format.
        If obj is list, sanitize each element in the list.
        If obj is dict, return the dict.
        If obj is swagger model, return the properties dict.

        :param obj: The data to serialize.
        :return: The serialized form of data.
        """
        if obj is None:
            return None
        elif isinstance(obj, self.PRIMITIVE_TYPES):
            return obj
        elif isinstance(obj, list):
            return [
                self.sanitize_for_serialization(sub_obj) for sub_obj in obj
            ]
        elif isinstance(obj, tuple):
            return tuple(
                self.sanitize_for_serialization(sub_obj) for sub_obj in obj)
        elif isinstance(obj, (datetime, date)):
            return obj.isoformat()

        if isinstance(obj, dict):
            obj_dict = obj
        else:
            # Convert model obj to dict except
            # attributes `swagger_types`, `attribute_map`
            # and attributes which value is not None.
            # Convert attribute name to json key in
            # model definition for request.
            obj_dict = {
                obj.attribute_map[attr]: getattr(obj, attr)
                for attr, _ in iteritems(obj.swagger_types)
                if getattr(obj, attr) is not None
            }

        return {
            key: self.sanitize_for_serialization(val)
            for key, val in iteritems(obj_dict)
        }

    def deserialize(self, response, response_type):
        """
        Deserializes response into an object.

        :param response: RESTResponse object to be deserialized.
        :param response_type: class literal for
            deserialized object, or string of class name.

        :return: deserialized object.
        """
        # handle file downloading
        # save response body into a tmp file and return the instance
        if response_type == "file":
            return self.__deserialize_file(response)

        # fetch data from response object
        try:
            data = json.loads(response.data)
        except ValueError:
            data = response.data

        return self.__deserialize(data, response_type)

    def __deserialize(self, data, klass):
        """
        Deserializes dict, list, str into an object.

        :param data: dict, list or str.
        :param klass: class literal, or string of class name.

        :return: object.
        """
        if data is None:
            return None

        if type(klass) == str:
            if klass.startswith('list['):
                sub_kls = re.match('list\[(.*)\]', klass).group(1)
                return [
                    self.__deserialize(sub_data, sub_kls) for sub_data in data
                ]

            if klass.startswith('dict('):
                sub_kls = re.match('dict\(([^,]*), (.*)\)', klass).group(2)
                return {
                    k: self.__deserialize(v, sub_kls)
                    for k, v in iteritems(data)
                }

            # convert str to class
            if klass in self.NATIVE_TYPES_MAPPING:
                klass = self.NATIVE_TYPES_MAPPING[klass]
            else:
                klass = getattr(models, klass)

        if klass in self.PRIMITIVE_TYPES:
            return self.__deserialize_primitive(data, klass)
        elif klass == object:
            return self.__deserialize_object(data)
        elif klass == date:
            return self.__deserialize_date(data)
        elif klass == datetime:
            return self.__deserialize_datatime(data)
        else:
            return self.__deserialize_model(data, klass)

    def call_api(self,
                 resource_path,
                 method,
                 path_params=None,
                 query_params=None,
                 header_params=None,
                 body=None,
                 post_params=None,
                 files=None,
                 response_type=None,
                 auth_settings=None,
                 async=None,
                 _return_http_data_only=None,
                 collection_formats=None,
                 _preload_content=True,
                 _request_timeout=None):

Exemple #33

0

Afficher le fichier

Fichier : athena_ctas_etl.py Projet : mknav2011/backup_biz_data

    def execute_sqls_threaded(self, sql_queries, thread_pool_size=5):
        """ executes a array of SQLs using threads and returns results, useful for threaded batch operations
        Parameters:
        sql_queries array of SQL queries to execute
        thread_pool_size pool size to use, MAX a/c limit in PROD is 50 so its recommended to keep it around 2-5.

        Returns:
        True if all SQLs have been executed successfully, else False
        """
        if len(sql_queries) == 0:
            return True

        start_time = time.time()

        if (thread_pool_size < 1):
            thread_pool_size = 1
        POOL_SIZE = thread_pool_size

        if (len(sql_queries) < POOL_SIZE):
            POOL_SIZE = len(sql_queries)
        # Make the Pool of workers
        pool = ThreadPool(POOL_SIZE)

        print("Using pool size of {}".format(POOL_SIZE))

        count = 0
        failed_count = 0
        OPERATIONS = len(sql_queries)
        result = True
        while ((count + failed_count) < OPERATIONS):
            # print(count,failed_count,OPERATIONS)
            try:
                for i, r in enumerate(
                        pool.imap_unordered(self.start_query_execution_and_wait_for_completion, sql_queries), 1):
                    try:
                        # print(i,r)

                        if r is None:
                            failed_count = failed_count + 1
                            result = False
                        elif "SUCCESS" in r and r["SUCCESS"] == False:
                            failed_count = failed_count + 1
                            result = False
                            # break
                        else:
                            print(r["QUERY"])
                            count += 1
                            # your code
                        # elapsed_time = time.time() - start_time
                        # sys.stderr.write('\r{0:%} {} {}'.format((count*1.0/OPERATIONS),count,elapsed_time))
                        sys.stderr.write(
                            '\r{0:%} completed {1}, failed {2}, TOTAL: {3}'.format((count * 1.0 / OPERATIONS), count,
                                                                                   failed_count, OPERATIONS))
                    except Exception as e:
                        # print(traceback.format_exc())
                        print("#", str(e))
                        failed_count += 1
                        # print('#',sys.exc_info()[1])
                        # pass
            except Exception as e:
                # print(traceback.format_exc())
                print(str(e))
                failed_count += 1
                # print('$',sys.exc_info()[1])
                pass
        print("test_threaded_metric_log --- %s seconds ---for %s get ops using %s threads" % (
        (time.time() - start_time), OPERATIONS, POOL_SIZE))
        print("total: " + str(OPERATIONS) + ", failed: " + str(failed_count))

        # close the pool and wait for the work to finish
        pool.close()
        pool.join()

        if ((result == True and count == OPERATIONS)):
            print("Operation successful")
            return True
        else:
            print("Operation had errors")
            raise Exception("Operation had errors")

Exemple #34

0

Afficher le fichier

def runAllRuns(params,threadCount=4):
    pool = ThreadPool(threadCount)
    results = pool.map(execExpr, params)
    pool.close()
    pool.join
    return results

Exemple #35

0

Afficher le fichier

Fichier : PrepareSandboxInfraOperation.py Projet : nahumtimerman/Azure-Shell

    def prepare_connectivity(self, reservation, cloud_provider_model,
                             storage_client, resource_client, network_client,
                             logger, actions, cancellation_context):
        """
        :param logging.Logger logger:
        :param actions: list[cloudshell.cp.core.models.RequestActionBase]
        :param network_client:
        :param storage_client:
        :param resource_client:
        :param cloudshell.cp.azure.models.reservation_model.ReservationModel reservation:
        :param cloudshell.cp.azure.models.azure_cloud_provider_resource_model.AzureCloudProviderResourceModel cloud_provider_model:cloud provider
        :param cancellation_context cloudshell.shell.core.driver_context.CancellationContext instance
        :return:
        """
        cidr = self._validate_request_and_extract_cidr(actions)
        logger.info("Received CIDR {0} from server".format(cidr))

        reservation_id = reservation.reservation_id
        group_name = str(reservation_id)
        subnet_name = group_name
        tags = self.tags_service.get_tags(reservation=reservation)
        create_key_action_result = CreateKeysActionResult()

        # 1. Create a resource group
        logger.info("Creating a resource group: {0} .".format(group_name))
        self.vm_service.create_resource_group(
            resource_management_client=resource_client,
            group_name=group_name,
            region=cloud_provider_model.region,
            tags=tags)

        self.cancellation_service.check_if_cancelled(cancellation_context)
        storage_account_name = self._prepare_storage_account_name(
            reservation_id)

        # 2+3. create storage account and keypairs (async)
        pool = ThreadPool()
        storage_res = pool.apply_async(
            self._create_storage_and_keypairs,
            (logger, storage_client, storage_account_name, group_name,
             cloud_provider_model, tags, cancellation_context,
             create_key_action_result))

        logger.info(
            "Retrieving MGMT vNet from resource group {} by tag {}={}".format(
                cloud_provider_model.management_group_name,
                NetworkService.NETWORK_TYPE_TAG_NAME,
                NetworkService.MGMT_NETWORK_TAG_VALUE))

        virtual_networks = self.network_service.get_virtual_networks(
            network_client=network_client,
            group_name=cloud_provider_model.management_group_name)

        self.cancellation_service.check_if_cancelled(cancellation_context)

        management_vnet = self.network_service.get_virtual_network_by_tag(
            virtual_networks=virtual_networks,
            tag_key=NetworkService.NETWORK_TYPE_TAG_NAME,
            tag_value=NetworkService.MGMT_NETWORK_TAG_VALUE)

        self._validate_management_vnet(management_vnet)

        logger.info(
            "Retrieving sandbox vNet from resource group {} by tag {}={}".
            format(cloud_provider_model.management_group_name,
                   NetworkService.NETWORK_TYPE_TAG_NAME,
                   NetworkService.SANDBOX_NETWORK_TAG_VALUE))

        sandbox_vnet = self.network_service.get_virtual_network_by_tag(
            virtual_networks=virtual_networks,
            tag_key=NetworkService.NETWORK_TYPE_TAG_NAME,
            tag_value=NetworkService.SANDBOX_NETWORK_TAG_VALUE)

        self._validate_sandbox_vnet(sandbox_vnet)

        # 4. Create the NSG object
        security_group_name = reservation_id
        logger.info("Creating a network security group '{}' .".format(
            security_group_name))
        network_security_group = self.security_group_service.create_network_security_group(
            network_client=network_client,
            group_name=group_name,
            security_group_name=security_group_name,
            region=cloud_provider_model.region,
            tags=tags)

        self.cancellation_service.check_if_cancelled(cancellation_context)

        logger.info("Creating NSG management rules...")
        # 5. Set rules on NSG to create a sandbox
        self._create_management_rules(
            group_name=group_name,
            management_vnet=management_vnet,
            network_client=network_client,
            sandbox_vnet_cidr=cidr,
            security_group_name=security_group_name,
            additional_mgmt_networks=cloud_provider_model.
            additional_mgmt_networks,
            logger=logger)

        self.cancellation_service.check_if_cancelled(cancellation_context)

        # 6. Create a subnet with NSG
        self._create_subnet(cidr=cidr,
                            cloud_provider_model=cloud_provider_model,
                            logger=logger,
                            network_client=network_client,
                            resource_client=resource_client,
                            network_security_group=network_security_group,
                            sandbox_vnet=sandbox_vnet,
                            subnet_name=subnet_name)

        self.cancellation_service.check_if_cancelled(cancellation_context)

        # wait for all async operations
        pool.close()
        pool.join()
        storage_res.get(
            timeout=900
        )  # will wait for 15 min and raise exception if storage account creation failed

        return self._prepare_results(create_key_action_result, actions)

Exemple #36

0

Afficher le fichier

Fichier : parallelizedWriteTest.py Projet : rohan-sahgal/URL-Shortener

        request = "http://127.0.0.1:8000/"
        r = requests.put(request,
                         params={
                             'short': str(i),
                             'long': longResource
                         })
    except Exception as e:
        print(e)


numWrites = [10, 100, 1000, 4000]

with open('./data/varying_writes.tsv', 'wt') as out_file:

    for i in range(len(numWrites)):

        pool = Pool(pool_size)
        t0 = time.time()

        for j in range(numWrites[i]):
            pool.apply_async(worker, (j, ))

        pool.close()
        pool.join()

        t1 = time.time()

        tsv_writer = csv.writer(out_file, delimiter='\t')
        tsv_writer.writerow([t1 - t0, numWrites[i]])
        print("{} writes: {} seconds".format(numWrites[i], t1 - t0))

Exemple #37

0

Afficher le fichier

    def convert_dataset(self,
                        dataset,
                        to_format,
                        local_path,
                        conversion_func=None,
                        filters=None,
                        annotation_filter=None):
        """
        Convert entire dataset

        :param annotation_filter:
        :param dataset:
        :param to_format:
        :param local_path:
        :param conversion_func: Custom conversion service
        :param filters: optional
        :return:
        """
        if to_format.lower() == 'coco':
            return self.__convert_dataset_to_coco(
                dataset=dataset,
                local_path=local_path,
                filters=filters,
                annotation_filter=annotation_filter)
        num_workers = 6
        assert isinstance(dataset, entities.Dataset)
        self.dataset = dataset

        # download annotations
        if annotation_filter is None:
            dataset.download_annotations(local_path=local_path, overwrite=True)
        local_annotations_path = os.path.join(local_path, "json")
        output_annotations_path = os.path.join(local_path, to_format)
        pool = ThreadPool(processes=num_workers)
        i_item = 0
        pages = dataset.items.list(filters=filters)

        # if yolo - create labels file
        if to_format == 'yolo':
            labels = [label.tag for label in dataset.labels]
            with open('{}/{}.names'.format(local_path, dataset.name),
                      'w') as fp:
                for label in labels:
                    fp.write("{}\n".format(label))

        pbar = tqdm.tqdm(total=pages.items_count)
        for page in pages:
            for item in page:
                i_item += 1
                # create input annotations json
                in_filepath = os.path.join(local_annotations_path,
                                           item.filename[1:])
                name, ext = os.path.splitext(in_filepath)
                in_filepath = name + '.json'

                save_to = os.path.dirname(
                    in_filepath.replace(local_annotations_path,
                                        output_annotations_path))

                if not os.path.isdir(save_to):
                    os.makedirs(save_to, exist_ok=True)

                converter = utilities.Converter()
                converter.dataset = self.dataset
                converter.save_to_format = self.save_to_format
                converter.xml_template_path = self.xml_template_path

                if annotation_filter is None:
                    method = converter.convert_file
                else:
                    method = converter.__save_filtered_annotations_and_convert

                pool.apply_async(func=method,
                                 kwds={
                                     "to_format": to_format,
                                     "from_format": 'dataloop',
                                     "file_path": in_filepath,
                                     "save_locally": True,
                                     "save_to": save_to,
                                     'conversion_func': conversion_func,
                                     'item': item,
                                     'pbar': pbar,
                                     'filters': annotation_filter
                                 })
        pool.close()
        pool.join()
        pool.terminate()
        pbar.close()

Exemple #38

0

Afficher le fichier

class ApiClient(object):
    """Generic API client for Swagger client library builds.

    Swagger generic API client. This client handles the client-
    server communication, and is invariant across implementations. Specifics of
    the methods and models for each application are generated from the Swagger
    templates.

    NOTE: This class is auto generated by the swagger code generator program.
    Ref: https://github.com/swagger-api/swagger-codegen
    Do not edit the class manually.

    :param configuration: .Configuration object for this client
    :param header_name: a header to pass when making calls to the API.
    :param header_value: a header value to pass when making calls to
        the API.
    :param cookie: a cookie to include in the header when making calls
        to the API
    """

    PRIMITIVE_TYPES = (float, bool, bytes, six.text_type) + six.integer_types
    NATIVE_TYPES_MAPPING = {
        'int': int,
        'long': int if six.PY3 else long,  # noqa: F821
        'float': float,
        'str': str,
        'bool': bool,
        'date': datetime.date,
        'datetime': datetime.datetime,
        'object': object,
    }

    def __init__(self,
                 configuration=None,
                 header_name=None,
                 header_value=None,
                 cookie=None):
        if configuration is None:
            configuration = Configuration()
        self.configuration = configuration

        self.pool = ThreadPool()
        self.rest_client = rest.RESTClientObject(configuration)
        self.default_headers = {}
        if header_name is not None:
            self.default_headers[header_name] = header_value
        self.cookie = cookie
        # Set default User-Agent.
        self.user_agent = 'Swagger-Codegen/0.2.3/python'
        # This is used for detecting for the special case of a path parameter
        # that is tagged with x-isi-url-encode-path-param (more details in the
        # __call_api function).
        self.quote_plus_tag = "__x-isi-url-encode-path-param__"
        self.quote_plus_tag_len = len(self.quote_plus_tag)

        self.session_expiration = 0
        self.inactive_expiration = 0
        self.x_csrf_token = None

    def __del__(self):
        self.pool.close()
        self.pool.join()

    @property
    def user_agent(self):
        """User agent for this API client"""
        return self.default_headers['User-Agent']

    @user_agent.setter
    def user_agent(self, value):
        self.default_headers['User-Agent'] = value

    def set_default_header(self, header_name, header_value):
        self.default_headers[header_name] = header_value

    def __call_api(self,
                   resource_path,
                   method,
                   path_params=None,
                   query_params=None,
                   header_params=None,
                   body=None,
                   post_params=None,
                   files=None,
                   response_type=None,
                   auth_settings=None,
                   _return_http_data_only=None,
                   collection_formats=None,
                   _preload_content=True,
                   _request_timeout=None):

        config = self.configuration

        # header parameters
        header_params = header_params or {}
        header_params.update(self.default_headers)
        if self.cookie:
            header_params['Cookie'] = self.cookie
        if header_params:
            header_params = self.sanitize_for_serialization(header_params)
            header_params = dict(
                self.parameters_to_tuples(header_params, collection_formats))

        # path parameters
        if path_params:
            path_params = self.sanitize_for_serialization(path_params)
            path_params = self.parameters_to_tuples(path_params,
                                                    collection_formats)
            for k, v in path_params:
                v_str = str(v)
                # Check for the special case of the
                # x-isi-url-encode-path-param, which indicates that the
                # parameter should be encoded with quote_plus in order
                # to encode the '/' character.
                # check if the first part of v_str matches the tag
                if v_str[:self.quote_plus_tag_len] == self.quote_plus_tag:
                    # remove "__x-isi-url-encode-path-param__"
                    v_str = v_str[self.quote_plus_tag_len:]
                    # then url-encode with quote_plus
                    replacement = quote_plus(v_str)
                else:
                    replacement = quote(v_str,
                                        safe=config.safe_chars_for_path_param)

                # specified safe chars, encode everything
                resource_path = resource_path.replace('{%s}' % k, replacement)

        # query parameters
        if query_params:
            query_params = self.sanitize_for_serialization(query_params)
            query_params = self.parameters_to_tuples(query_params,
                                                     collection_formats)

        # post parameters
        if post_params or files:
            post_params = self.prepare_post_parameters(post_params, files)
            post_params = self.sanitize_for_serialization(post_params)
            post_params = self.parameters_to_tuples(post_params,
                                                    collection_formats)

        # auth setting
        if not self.configuration.host.startswith('papi://'):
            self.update_params_for_auth(header_params, query_params,
                                        auth_settings)

        # body
        if body:
            body = self.sanitize_for_serialization(body)

        # request url
        url = self.configuration.host + resource_path

        # perform request and return response
        response_data = self.request(method,
                                     url,
                                     query_params=query_params,
                                     headers=header_params,
                                     post_params=post_params,
                                     body=body,
                                     _preload_content=_preload_content,
                                     _request_timeout=_request_timeout)

        self.last_response = response_data

        return_data = response_data
        if _preload_content:
            # deserialize response data
            if response_type:
                return_data = self.deserialize(response_data, response_type)
            else:
                return_data = None

        if _return_http_data_only:
            return (return_data)
        else:
            return (return_data, response_data.status,
                    response_data.getheaders())

    def sanitize_for_serialization(self, obj):
        """Builds a JSON POST object.

        If obj is None, return None.
        If obj is str, int, long, float, bool, return directly.
        If obj is datetime.datetime, datetime.date
            convert to string in iso8601 format.
        If obj is list, sanitize each element in the list.
        If obj is dict, return the dict.
        If obj is swagger model, return the properties dict.

        :param obj: The data to serialize.
        :return: The serialized form of data.
        """
        if obj is None:
            return None
        elif isinstance(obj, self.PRIMITIVE_TYPES):
            return obj
        elif isinstance(obj, list):
            return [
                self.sanitize_for_serialization(sub_obj) for sub_obj in obj
            ]
        elif isinstance(obj, tuple):
            return tuple(
                self.sanitize_for_serialization(sub_obj) for sub_obj in obj)
        elif isinstance(obj, (datetime.datetime, datetime.date)):
            return obj.isoformat()

        if isinstance(obj, dict):
            obj_dict = obj
        else:
            # Convert model obj to dict except
            # attributes `swagger_types`, `attribute_map`
            # and attributes which value is not None.
            # Convert attribute name to json key in
            # model definition for request.
            obj_dict = {
                obj.attribute_map[attr]: getattr(obj, attr)
                for attr, _ in six.iteritems(obj.swagger_types)
                if getattr(obj, attr) is not None
            }

        return {
            key: self.sanitize_for_serialization(val)
            for key, val in six.iteritems(obj_dict)
        }

    def deserialize(self, response, response_type):
        """Deserializes response into an object.

        :param response: RESTResponse object to be deserialized.
        :param response_type: class literal for
            deserialized object, or string of class name.

        :return: deserialized object.
        """
        # handle file downloading
        # save response body into a tmp file and return the instance
        if response_type == "file":
            return self.__deserialize_file(response)

        # fetch data from response object
        try:
            data = json.loads(response.data)
        except ValueError:
            data = response.data

        return self.__deserialize(data, response_type)

    def __deserialize(self, data, klass):
        """Deserializes dict, list, str into an object.

        :param data: dict, list or str.
        :param klass: class literal, or string of class name.

        :return: object.
        """
        if data is None:
            return None

        if type(klass) == str:
            if klass.startswith('list['):
                sub_kls = re.match('list\[(.*)\]', klass).group(1)
                return [
                    self.__deserialize(sub_data, sub_kls) for sub_data in data
                ]

            if klass.startswith('dict('):
                sub_kls = re.match('dict\(([^,]*), (.*)\)', klass).group(2)
                return {
                    k: self.__deserialize(v, sub_kls)
                    for k, v in six.iteritems(data)
                }

            # convert str to class
            if klass in self.NATIVE_TYPES_MAPPING:
                klass = self.NATIVE_TYPES_MAPPING[klass]
            else:
                klass = getattr(isi_sdk_8_0.models, klass)

        if klass in self.PRIMITIVE_TYPES:
            return self.__deserialize_primitive(data, klass)
        elif klass == object:
            return self.__deserialize_object(data)
        elif klass == datetime.date:
            return self.__deserialize_date(data)
        elif klass == datetime.datetime:
            return self.__deserialize_datatime(data)
        else:
            return self.__deserialize_model(data, klass)

    def call_api(self,
                 resource_path,
                 method,
                 path_params=None,
                 query_params=None,
                 header_params=None,
                 body=None,
                 post_params=None,
                 files=None,
                 response_type=None,
                 auth_settings=None,
                 async=None,
                 _return_http_data_only=None,
                 collection_formats=None,
                 _preload_content=True,
                 _request_timeout=None):

Exemple #39

0

Afficher le fichier

Fichier : geoparse.py Projet : zcdzcdzcd/mordecai

    def geoparse(self, doc, verbose=False):
        """Main geoparsing function. Text to extracted, resolved entities.

        Parameters
        ----------
        doc : str or spaCy
            The document to be geoparsed. Can be either raw text or already spacy processed.
            In some cases, it makes sense to bulk parse using spacy's .pipe() before sending
            through to Mordecai

        Returns
        -------
        proced : list of dicts
            Each entity gets an entry in the list, with the dictionary including geo info, spans,
            and optionally, the input features.
        """
        if not hasattr(doc, "ents"):
            doc = nlp(doc)
        proced = self.infer_country(doc)
        if not proced:
            return []
            # logging!
            #print("Nothing came back from infer_country...")
        if self.threads:
            pool = ThreadPool(len(proced))
            results = pool.map(self.proc_lookup_country, proced)
            pool.close()
            pool.join()
        else:
            results = []
            for loc in proced:
                # if the confidence is too low, don't use the country info
                if loc['country_conf'] > self.country_threshold:
                    res = self.query_geonames_country(loc['word'],
                                                      loc['country_predicted'])
                    results.append(res)
                else:
                    results.append("")

        for n, loc in enumerate(proced):
            res = results[n]
            try:
                _ = res['hits']['hits']
                # If there's no geonames result, what to do?
                # For now, just continue.
                # In the future, delete? Or add an empty "loc" field?
            except (TypeError, KeyError):
                continue
            # Pick the best place
            X, meta = self.features_for_rank(loc, res)
            if X.shape[1] == 0:
                # This happens if there are no results...
                continue
            all_tasks, sorted_meta, sorted_X = self.format_for_prodigy(
                X, meta, loc['word'], return_feature_subset=True)
            fl_pad = np.pad(sorted_X, ((0, 4 - sorted_X.shape[0]), (0, 0)),
                            'constant')
            fl_unwrap = fl_pad.flatten()
            prediction = self.rank_model.predict(np.asmatrix(fl_unwrap))
            place_confidence = prediction.max()
            loc['geo'] = sorted_meta[prediction.argmax()]
            loc['place_confidence'] = place_confidence
        if not verbose:
            proced = self.clean_proced(proced)
        return proced

Exemple #40

0

Afficher le fichier

Fichier : hxtool_scheduler.py Projet : stevenacalhoun/HXTool

class hxtool_scheduler:
	def __init__(self, thread_count = None):
		self._lock = threading.Lock()
		self.task_queue = {}
		self.history_queue = {}
		self.task_hx_api_sessions = {}
		self._poll_thread = threading.Thread(target = self._scan_task_queue, name = "PollThread")
		self._stop_event = threading.Event()
		# Allow for thread oversubscription based on CPU count
		self.thread_count = thread_count or (cpu_count() + 1)
		self.task_threads = ThreadPool(self.thread_count)
		logger.info("Task scheduler initialized.")


	def _scan_task_queue(self):
		while not self._stop_event.wait(.1):
			ret = None
			with self._lock:
				ret = self.task_threads.imap_unordered(self._run_task, [_ for _ in self.task_queue.values() if _.should_run()])
			if ret:
				while not self._stop_event.is_set():
					try:
						ret.next(timeout=5)
					except TimeoutError:
						break
					except StopIteration:
						break
					except Exception as e:
						logger.error(pretty_exceptions(e))
						continue
					
	def _run_task(self, task):
		ret = False
		task.set_state(TASK_STATE_QUEUED)
		logger.debug("Executing task with id: %s, name: %s.", task.task_id, task.name)
		try:
			ret = task.run(self)
		except Exception as e:
			logger.error(pretty_exceptions(e))
			task.set_state(TASK_STATE_FAILED)
		finally:
			return ret
			
	def _add_task_api_task(self, profile_id, hx_host, hx_port, username, password):
		self.task_hx_api_sessions[profile_id] = HXAPI(hx_host,
														hx_port = hx_port, 
														proxies = hxtool_global.hxtool_config['network'].get('proxies'), 
														headers = hxtool_global.hxtool_config['headers'], 
														cookies = hxtool_global.hxtool_config['cookies'], 
														logger_name = hxtool_logging.getLoggerName(HXAPI.__name__), 
														default_encoding = default_encoding)
		api_login_task = hxtool_scheduler_task(profile_id, "Task API Login - {}".format(hx_host), immutable = True)
		api_login_task.add_step(hxtool_task_modules.task_api_session_module, kwargs = {
									'profile_id' : profile_id,
									'username' : username,
									'password' : password
		})
		self.add(api_login_task)
	
	def start(self):
		self._poll_thread.start()
		logger.info("Task scheduler started with %s threads.", self.thread_count)
		
	def stop(self):
		logger.debug("stop() enter.")
		self._stop_event.set()
		logger.debug("Closing the task thread pool.")
		self.task_threads.close()
		logger.debug("Waiting for running threads to terminate.")
		self.task_threads.join()
		logger.debug("stop() exit.")
	
	def initialize_task_api_sessions(self):
		# Loop through background credentials and start the API sessions
		profiles = hxtool_global.hxtool_db.profileList()
		for profile in profiles:
			task_api_credential = hxtool_global.hxtool_db.backgroundProcessorCredentialGet(profile['profile_id'])
			if task_api_credential:
				try:
					salt = HXAPI.b64(task_api_credential['salt'], True)
					iv = HXAPI.b64(task_api_credential['iv'], True)
					key = crypt_pbkdf2_hmacsha256(salt, TASK_API_KEY)
					decrypted_background_password = crypt_aes(key, iv, task_api_credential['hx_api_encrypted_password'], decrypt = True)
					self._add_task_api_task(profile['profile_id'], profile['hx_host'], profile['hx_port'], task_api_credential['hx_api_username'], decrypted_background_password) 
					decrypted_background_password = None
				except UnicodeDecodeError:
					logger.error("Please reset the background credential for {} ({}).".format(profile['hx_host'], profile['profile_id']))
			else:
				logger.info("No background credential for {} ({}).".format(profile['hx_host'], profile['profile_id']))
	
	def add_task_api_session(self, profile_id, hx_host, hx_port, username, password):
		iv = crypt_generate_random(16)
		salt = crypt_generate_random(32)
		key = crypt_pbkdf2_hmacsha256(salt, TASK_API_KEY)
		encrypted_password = crypt_aes(key, iv, password)
		hxtool_global.hxtool_db.backgroundProcessorCredentialCreate(profile_id, username, HXAPI.b64(iv), HXAPI.b64(salt), encrypted_password)
		encrypted_password = None
		self._add_task_api_task(profile_id, hx_host, hx_port, username, password)
		password = None
	
	def remove_task_api_session(self, profile_id):
		out = hxtool_global.hxtool_db.backgroundProcessorCredentialRemove(profile_id)
		hx_api_object = self.task_hx_api_sessions.get(profile_id)
		if hx_api_object and hx_api_object.restIsSessionValid():
			(ret, response_code, response_data) = hx_api_object.restLogout()
			del self.task_hx_api_sessions[profile_id]
	
	def logout_task_api_sessions(self):
		for hx_api_object in self.task_hx_api_sessions.values():
			if hx_api_object is not None:
				hx_api_object.restLogout()
				hx_api_object = None
	
	def signal_child_tasks(self, parent_task_id, parent_task_state, parent_stored_result):
		with self._lock:
			for task_id in self.task_queue:
				self.task_queue[task_id].parent_state_callback(parent_task_id, parent_task_state, parent_stored_result)
	
	def add(self, task, should_store = True):
		with self._lock:
			self.task_queue[task.task_id] = task
			task.set_state(TASK_STATE_SCHEDULED)
			# Note: this must be within the lock otherwise we run into a nasty race condition where the task runs before the stored state is set -
			# with the run lock taking precedence.
			if should_store:
				task.store()
		return task.task_id	
		
	def add_list(self, tasks):
		if isinstance(tasks, list):
			for t in tasks:
				self.add(t)
		
	def remove(self, task_id, delete_children=True):
		if task_id:
			with self._lock:
				if delete_children:
					# We need to make a shallow copy so we don't modify the task_queue while iterating over it
					for child_task_id in [_.task_id for _ in self.task_queue.values() if _.parent_id == task_id]:
						self.task_queue[child_task_id].remove()
						del self.task_queue[child_task_id]
							
					for child_task_id in [_['task_id'] for _ in self.history_queue.values() if _['parent_id'] == task_id]:
						del self.history_queue[child_task_id]
							
				t = self.task_queue.get(task_id, None)
				if t and not t.immutable:
					t.remove()
					del self.task_queue[task_id]
					t = None
				elif task_id in self.history_queue:
					del self.history_queue[task_id]
				
	def get(self, task_id):
		with self._lock:
			return self.task_queue.get(task_id, None)

	def move_to_history(self, task_id):
		with self._lock:
			t = self.task_queue.pop(task_id, None)
			if t is not None:
				self.history_queue[task_id] = t.metadata()
		if len(self.history_queue) > MAX_HISTORY_QUEUE_LENGTH:
			self.history_queue.popitem()
	
	def tasks(self):
		# Shallow copy to avoid locking
		return [_.metadata() for _ in list(self.task_queue.values())] + list(self.history_queue.values())
	
	# Load queued tasks from the database
	def load_from_database(self):
		try:
			if self.status():
				tasks = hxtool_global.hxtool_db.taskList()
				for task_entry in tasks:
					p_id = task_entry.get('parent_id', None)
					if p_id and (not task_entry['parent_complete'] and not hxtool_global.hxtool_db.taskGet(task_entry['profile_id'], p_id)):
						logger.warn("Deleting orphan task {}, {}".format(task_entry['name'], task_entry['task_id']))
						hxtool_global.hxtool_db.taskDelete(task_entry['profile_id'], task_entry['task_id'])
					else:
						task = hxtool_scheduler_task.deserialize(task_entry)
						task.set_stored()
						# Set should_store to False as we've already been stored, and we skip a needless update
						self.add(task, should_store = False)
			else:
				logger.warn("Task scheduler must be running before loading queued tasks from the database.")
		except Exception as e:
			logger.error("Failed to load saved tasks from the database. Error: {}".format(pretty_exceptions(e)))
	
	def status(self):
		return self._poll_thread.is_alive()

Exemple #41

0

Afficher le fichier

Fichier : worker_pool.py Projet : zvikihouzz/pants

class WorkerPool:
    """A pool of workers.

  Workers are threads, and so are subject to GIL constraints. Submitting CPU-bound work
  may not be effective. Use this class primarily for IO-bound work.
  """
    def __init__(self, parent_workunit, run_tracker, num_workers,
                 thread_name_prefix):
        self._run_tracker = run_tracker
        self.thread_lock = threading.Lock()
        self.thread_counter = 0

        def intitialize():
            with self.thread_lock:
                threading.current_thread().name = "{}-{}".format(
                    thread_name_prefix, self.thread_counter)
                self.thread_counter += 1
            self._run_tracker.register_thread(parent_workunit)

        # All workers accrue work to the same root.
        self._pool = ThreadPool(
            processes=num_workers,
            initializer=intitialize,
        )
        # We mustn't shutdown when there are pending workchains, as they may need to submit work
        # in the future, and the pool doesn't know about this yet.
        self._pending_workchains = 0
        self._pending_workchains_cond = threading.Condition(
        )  # Protects self._pending_workchains.

        self._shutdown_hooks = []

        self.num_workers = num_workers

    def add_shutdown_hook(self, hook):
        self._shutdown_hooks.append(hook)

    def submit_async_work(self,
                          work,
                          workunit_parent=None,
                          on_success=None,
                          on_failure=None):
        """Submit work to be executed in the background.

    :param work: The work to execute.
    :param workunit_parent: If specified, work is accounted for under this workunit.
    :param on_success: If specified, a callable taking a single argument, which will be a list
                  of return values of each invocation, in order. Called only if all work succeeded.
    :param on_failure: If specified, a callable taking a single argument, which is an exception
                  thrown in the work.

    :return: `multiprocessing.pool.MapResult`

    Don't do work in on_success: not only will it block the result handling thread, but
    that thread is not a worker and doesn't have a logging context etc. Use it just to
    submit further work to the pool.
    """
        if work is None or len(
                work.args_tuples
        ) == 0:  # map_async hangs on 0-length iterables.
            if on_success:
                on_success([])
        else:

            def do_work(*args):
                self._do_work(work.func,
                              *args,
                              workunit_name=work.workunit_name,
                              workunit_parent=workunit_parent,
                              on_failure=on_failure)

            return self._pool.map_async(do_work,
                                        work.args_tuples,
                                        chunksize=1,
                                        callback=on_success)

    def submit_async_work_chain(self,
                                work_chain,
                                workunit_parent,
                                done_hook=None):
        """Submit work to be executed in the background.

    - work_chain: An iterable of Work instances. Will be invoked serially. Each instance may
                  have a different cardinality. There is no output-input chaining: the argument
                  tuples must already be present in each work instance.  If any work throws an
                  exception no subsequent work in the chain will be attempted.
    - workunit_parent: Work is accounted for under this workunit.
    - done_hook: If not None, invoked with no args after all work is done, or on error.
    """
        def done():
            if done_hook:
                done_hook()
            with self._pending_workchains_cond:
                self._pending_workchains -= 1
                self._pending_workchains_cond.notify()

        def error(e):
            done()
            self._run_tracker.log(Report.ERROR, '{}'.format(e))

        # We filter out Nones defensively. There shouldn't be any, but if a bug causes one,
        # Pants might hang indefinitely without this filtering.
        work_iter = (_f for _f in work_chain if _f)

        def submit_next():
            try:
                self.submit_async_work(next(work_iter),
                                       workunit_parent=workunit_parent,
                                       on_success=lambda x: submit_next(),
                                       on_failure=error)
            except StopIteration:
                done()  # The success case.

        with self._pending_workchains_cond:
            self._pending_workchains += 1
        try:
            submit_next()
        except Exception as e:  # Handles errors in the submission code.
            done()
            self._run_tracker.log(Report.ERROR, '{}'.format(e))
            raise

    def submit_work_and_wait(self, work, workunit_parent=None):
        """Submit work to be executed on this pool, but wait for it to complete.

    - work: The work to execute.
    - workunit_parent: If specified, work is accounted for under this workunit.

    Returns a list of return values of each invocation, in order.  Throws if any invocation does.
    """
        if work is None or len(
                work.args_tuples) == 0:  # map hangs on 0-length iterables.
            return []
        else:

            def do_work(*args):
                return self._do_work(work.func,
                                     *args,
                                     workunit_name=work.workunit_name,
                                     workunit_parent=workunit_parent)

            # We need to specify a timeout explicitly, because otherwise python ignores SIGINT when waiting
            # on a condition variable, so we won't be able to ctrl-c out.
            return self._pool.map_async(do_work, work.args_tuples,
                                        chunksize=1).get(timeout=1000000000)

    def _do_work(self,
                 func,
                 args_tuple,
                 workunit_name,
                 workunit_parent,
                 on_failure=None):
        try:
            if workunit_name:
                with self._run_tracker.new_workunit_under_parent(
                        name=workunit_name, parent=workunit_parent):
                    return func(*args_tuple)
            else:
                return func(*args_tuple)
        except KeyboardInterrupt:
            # If a worker thread intercepts a KeyboardInterrupt, we want to propagate it to the main
            # thread.
            _thread.interrupt_main()
            raise
        except Exception as e:
            if on_failure:
                # Note that here the work's workunit is closed. So, e.g., it's OK to use on_failure()
                # to close an ancestor workunit.
                on_failure(e)
            raise

    def shutdown(self):
        with self._pending_workchains_cond:
            while self._pending_workchains > 0:
                self._pending_workchains_cond.wait()
            self._pool.close()
            self._pool.join()
            for hook in self._shutdown_hooks:
                hook()

    def abort(self):
        self._pool.terminate()

Exemple #42

0

Afficher le fichier

Fichier : geoparse.py Projet : zcdzcdzcd/mordecai

    def make_country_features(self, doc, require_maj=False):
        """
        Create features for the country picking model. Function where all the individual
        feature maker functions are called and aggregated. (Formerly "process_text")

        Parameters
        -----------
        doc : str or spaCy doc

        Returns
        -------
        task_list : list of dicts
            Each entry has the word, surrounding text, span, and the country picking features.
            This output can be put into Prodigy for labeling almost as-is (the "features" key needs
            to be renamed "meta" or be deleted.)
        """
        if not hasattr(doc, "ents"):
            doc = nlp(doc)
        # initialize the place to store finalized tasks
        task_list = []

        # get document vector
        #doc_vec = self._feature_word_embedding(text)['country_1']

        # get explicit counts of country names
        ct_mention, ctm_count1, ct_mention2, ctm_count2 = self._feature_country_mentions(
            doc)

        #  pull out the place names, skipping empty ones, countries, and known
        #  junk from the skip list (like "Atlanic Ocean"
        ents = []
        for ent in doc.ents:
            if not ent.text.strip():
                continue
            if ent.label_ not in ["GPE", "LOC", "FAC"]:
                continue
            # don't include country names (make a parameter)
            if ent.text.strip() in self._skip_list:
                continue
            ents.append(ent)
        if not ents:
            return []
        # Look them up in geonames, either sequentially if no threading, or
        # in parallel if threads.
        if self.threads:
            pool = ThreadPool(len(ents))
            ent_text = [i.text for i in ents]
            ent_results = pool.map(self.simple_lookup, ent_text)
            pool.close()
            pool.join()
        else:
            ent_results = []
            for ent in ents:
                try:
                    result = self.query_geonames(ent.text)
                except ConnectionTimeout:
                    result = ""
                ent_results.append(result)

        for n, ent in enumerate(ents):
            result = ent_results[n]
            #skip_list.add(ent.text.strip())
            ent_label = ent.label_  # destroyed by trimming
            ent = self.clean_entity(ent)

            # vector for just the solo word
            vp = self._feature_word_embedding(ent)
            try:
                word_vec = vp['country_1']
                wv_confid = float(vp['confid_a'])
            except TypeError:
                # no idea why this comes up
                word_vec = ""
                wv_confid = "0"

            # look for explicit mentions of feature names
            class_mention, code_mention = self._feature_location_type_mention(
                ent)
            # build results-based features
            most_alt = self._feature_most_alternative(result)
            # TODO check if most_common feature really isn't that useful
            most_common = self._feature_most_common(result)
            most_pop = self._feature_most_population(result)
            first_back, second_back = self._feature_first_back(result)

            try:
                maj_vote = Counter([
                    word_vec, most_alt, first_back, most_pop, ct_mention
                    #doc_vec_sent, doc_vec
                ]).most_common()[0][0]
            except Exception as e:
                print("Problem taking majority vote: ", ent, e)
                maj_vote = ""

            if not maj_vote:
                maj_vote = ""
            # We only want all this junk for the labeling task. We just want to straight to features
            # and the model when in production.
            try:
                start = ent.start_char
                end = ent.end_char
                iso_label = maj_vote
                try:
                    text_label = self._inv_cts[iso_label]
                except KeyError:
                    text_label = ""
                task = {
                    "text": ent.text,
                    "label": text_label,  # human-readable country name
                    "word": ent.text,
                    "spans": [{
                        "start": start,
                        "end": end,
                    }  # make sure to rename for Prodigy
                              ],
                    "features": {
                        "maj_vote": iso_label,
                        "word_vec": word_vec,
                        "first_back": first_back,
                        #"doc_vec" : doc_vec,
                        "most_alt": most_alt,
                        "most_pop": most_pop,
                        "ct_mention": ct_mention,
                        "ctm_count1": ctm_count1,
                        "ct_mention2": ct_mention2,
                        "ctm_count2": ctm_count2,
                        "wv_confid": wv_confid,
                        "class_mention":
                        class_mention,  # inferred geonames class from mentions
                        "code_mention": code_mention,
                        #"places_vec" : places_vec,
                        #"doc_vec_sent" : doc_vec_sent
                    }
                }
                task_list.append(task)
            except Exception as e:
                print(ent.text, )
                print(e)
        return task_list  # rename this var

Exemple #43

0

Afficher le fichier

class PandABlocksClient:
    # Sentinel that tells the send_loop and recv_loop to stop
    STOP = object()

    def __init__(self, hostname="localhost", port=8888, queue_cls=None):
        if queue_cls is None:
            try:
                # Python 2
                from Queue import Queue as queue_cls
            except ImportError:
                # Python 3
                from queue import Queue as queue_cls
        self.queue_cls = queue_cls
        self.hostname = hostname
        self.port = port
        # Completed lines for a response in progress
        self._completed_response_lines = []
        # True if the current response is multiline
        self._is_multiline = None
        # True when we have been started
        self.started = False
        # Filled in on start
        self._socket = None
        self._send_spawned = None
        self._send_queue = None
        self._recv_spawned = None
        self._response_queues = None
        self._thread_pool = None

    def start(self, spawn=None, socket_cls=None):
        if spawn is None:
            from multiprocessing.pool import ThreadPool

            self._thread_pool = ThreadPool(2)
            spawn = self._thread_pool.apply_async
        if socket_cls is None:
            from socket import socket as socket_cls
        assert not self.started, "Send and recv threads already started"
        # Holds (message, response_queue) to send next
        self._send_queue = self.queue_cls()
        # Holds response_queue to send next
        self._response_queues = self.queue_cls()
        self._socket = socket_cls()
        try:
            self._socket.connect((self.hostname, self.port))
        except OSError as e:
            raise ConnectionError(
                f"Can't connect to '{self.hostname}:{self.port}', "
                "did all services on the PandA start correctly?"
            ) from e

        self._send_spawned = spawn(self._send_loop)
        self._recv_spawned = spawn(self._recv_loop)
        self.started = True

    def stop(self):
        assert self.started, "Send and recv threads not started"
        self._send_queue.put((self.STOP, None))
        self._send_spawned.wait()
        import socket

        try:
            self._socket.shutdown(socket.SHUT_RD)
        except Exception:
            pass
        self._recv_spawned.wait()
        self._socket.close()
        self._socket = None
        self.started = False
        if self._thread_pool is not None:
            self._thread_pool.close()
            self._thread_pool.join()
            self._thread_pool = None

    def send(self, message):
        response_queue = self.queue_cls()
        self._send_queue.put((message, response_queue))
        return response_queue

    def recv(self, response_queue, timeout=10.0):
        response = response_queue.get(timeout=timeout)
        if isinstance(response, Exception):
            raise response
        else:
            return response

    def send_recv(self, message, timeout=10.0):
        """Send a message to a PandABox and wait for the response

        Args:
            message (str): The message to send
            timeout (float): How long to wait before raising queue.Empty

        Returns:
            str: The response
        """
        response_queue = self.send(message)
        response = self.recv(response_queue, timeout)
        return response

    def _send_loop(self):
        """Service self._send_queue, sending requests to server"""
        while True:
            message, response_queue = self._send_queue.get()
            if message is self.STOP:
                break
            try:
                self._response_queues.put(response_queue)
                self._socket.sendall(message.encode("utf-8"))
            except Exception:  # pylint:disable=broad-except
                log.exception("Exception sending message %s", message)

    def _get_lines(self):
        buf = ""
        while True:
            lines = buf.split("\n")
            for line in lines[:-1]:
                yield line
            buf = lines[-1]
            # Get something new from the socket
            rx = self._socket.recv(4096).decode("utf-8")
            if not rx:
                break
            buf += rx

    def _respond(self, resp):
        """Respond to the person waiting"""
        response_queue = self._response_queues.get(timeout=0.1)
        response_queue.put(resp)
        self._completed_response_lines = []
        self._is_multiline = None

    def _recv_loop(self):
        """Service socket recv, returning responses to the correct queue"""
        self._completed_response_lines = []
        self._is_multiline = None
        lines_iterator = self._get_lines()
        while True:
            try:
                line = next(lines_iterator)
                if self._is_multiline is None:
                    self._is_multiline = line.startswith("!") or line == "."
                if line.startswith("ERR"):
                    self._respond(ValueError(line))
                elif self._is_multiline:
                    if line == ".":
                        self._respond(self._completed_response_lines)
                    else:
                        assert (
                            line[0] == "!"
                        ), f"Multiline response {repr(line)} doesn't start with !"
                        self._completed_response_lines.append(line[1:])
                else:
                    self._respond(line)
            except StopIteration:
                return
            except Exception:
                log.exception("Exception receiving message")
                raise

    def _get_block_numbers(self):
        block_numbers = OrderedDict()
        for line in self.send_recv("*BLOCKS?\n"):
            block_name, number = line.split()
            block_numbers[block_name] = int(number)
        return block_numbers

    def parameterized_send(self, request, parameter_list):
        """Send batched requests for a list of parameters

        Args:
            request (str): Request to send, like "%s.*?\n"
            parameter_list (list): parameters to format with, like
                ["TTLIN", "TTLOUT"]

        Returns:
            dict: {parameter: response_queue}
        """
        response_queues = OrderedDict()
        for parameter in parameter_list:
            response_queues[parameter] = self.send(request % parameter)
        return response_queues

    def get_blocks_data(self):
        blocks = OrderedDict()

        # Get details about number of blocks
        block_numbers = self._get_block_numbers()
        block_names = list(block_numbers)

        # Queue up info about each block
        desc_queues = self.parameterized_send("*DESC.%s?\n", block_names)
        field_queues = self.parameterized_send("%s.*?\n", block_names)

        # Create BlockData for each block
        # TODO: we sort here while server gives these in hash table order
        for block_name in sorted(block_names):
            number = block_numbers[block_name]
            description = strip_ok(self.recv(desc_queues[block_name]))
            fields = OrderedDict()
            blocks[block_name] = BlockData(number, description, fields)

            # Parse the field list
            unsorted_fields = {}
            for line in self.recv(field_queues[block_name]):
                split = line.split()
                assert len(split) in (
                    3,
                    4,
                ), f"Expected field_data to have len 3 or 4, got {len(split)}"
                if len(split) == 3:
                    split.append("")
                field_name, index, field_type, field_subtype = split
                unsorted_fields[field_name] = (int(index), field_type, field_subtype)

            # Sort the field list
            def get_field_index(field_name):
                return unsorted_fields[field_name][0]

            field_names = sorted(unsorted_fields, key=get_field_index)

            # Request description for each field
            field_desc_queues = self.parameterized_send(
                "*DESC.%s.%%s?\n" % block_name, field_names
            )

            # Request enum labels for fields that are enums
            enum_fields = []
            for field_name in field_names:
                _, field_type, field_subtype = unsorted_fields[field_name]
                if field_type in ("bit_mux", "pos_mux") or field_subtype == "enum":
                    enum_fields.append(field_name)
                elif field_type == "ext_out":
                    enum_fields.append(field_name + ".CAPTURE")
            enum_queues = self.parameterized_send(
                "*ENUMS.%s.%%s?\n" % block_name, enum_fields
            )

            # Get desc and enum data for each field
            for field_name in field_names:
                _, field_type, field_subtype = unsorted_fields[field_name]
                if field_name in enum_queues:
                    labels = self.recv(enum_queues[field_name])
                elif field_name + ".CAPTURE" in enum_queues:
                    labels = self.recv(enum_queues[field_name + ".CAPTURE"])
                else:
                    labels = []
                description = strip_ok(self.recv(field_desc_queues[field_name]))
                fields[field_name] = FieldData(
                    field_type, field_subtype, description, labels
                )

        return blocks

    def get_pcap_bits_fields(self):
        # {field_to_set: [bit_names]}
        # E.g. {"PCAP.BITS0"=["TTLIN1.VAL", "TTLIN2.VAL", ...], ...}
        bits_fields = []
        for line in self.send_recv("PCAP.*?\n"):
            split = line.split()
            if len(split) == 4:
                field_name, _, field_type, field_subtype = split
                if field_type == "ext_out" and field_subtype == "bits":
                    bits_fields.append(f"PCAP.{field_name}")
        bits_queues = self.parameterized_send("%s.BITS?\n", sorted(bits_fields))
        bits = OrderedDict()
        for k, queue in bits_queues.items():
            bits[k + ".CAPTURE"] = self.recv(queue)
        return bits

    def get_changes(self, include_errors=False):
        table_queues = {}
        for line in self.send_recv("*CHANGES?\n"):
            if "=" in line:
                field, val = line.split("=", 1)
            elif line[-1] == "<":
                # table
                field = line[:-1]
                val = None
                table_queues[field] = self.send(f"{field}?\n")
            elif line.endswith("(error)"):
                if include_errors:
                    field = line.split(" ", 1)[0]
                    val = Exception
                else:
                    continue
            else:
                log.warning("Can't parse line %r of changes", line)
                continue
            yield field, val
        for field, q in table_queues.items():
            yield field, self.recv(q)

    def get_table_fields(self, block, field):
        fields = OrderedDict()
        enum_queues = {}
        for line in self.send_recv(f"{block}.{field}.FIELDS?\n"):
            split = line.split()
            name = split[1].strip()
            signed = False
            if len(split) > 2:
                # Field is an enum, get its values
                if split[2] == "enum":
                    enum_queues[name] = self.send(f"*ENUMS.{block}.{field}[].{name}?\n")
                elif split[2] == "int":
                    signed = True
            fields[name] = (split[0], signed)

        # Request description for each field
        desc_queues = self.parameterized_send(
            "*DESC.%s.%s[].%%s?\n" % (block, field), list(fields)
        )
        for name, (bits_str, signed) in fields.items():
            bits_hi, bits_lo = [int(x) for x in bits_str.split(":")]
            description = strip_ok(self.recv(desc_queues[name]))
            if name in enum_queues:
                labels = self.recv(enum_queues[name])
            else:
                labels = None
            fields[name] = TableFieldData(bits_hi, bits_lo, description, labels, signed)
        return fields

    def get_field(self, block, field):
        try:
            resp = self.send_recv(f"{block}.{field}?\n")
        except ValueError as e:
            raise ValueError(f"Error getting {block}.{field}: {e}")
        else:
            return strip_ok(resp)

    def set_field(self, block, field, value):
        self.set_fields({f"{block}.{field}": value})

    def set_fields(self, field_values):
        queues = OrderedDict()
        for field, value in field_values.items():
            message = f"{field}={value}\n"
            queues[(field, value)] = self.send(message)
        for (field, value), queue in queues.items():
            try:
                resp = self.recv(queue)
            except ValueError as e:
                raise ValueError(f"Error setting {field} to {value!r}: {e}")
            else:
                assert resp == "OK", f"Expected OK, got {resp!r}"

    def set_table(self, block, field, int_values):
        lines = [f"{block}.{field}<\n"]
        lines += [f"{int_value}\n" for int_value in int_values]
        lines += ["\n"]
        resp = self.send_recv("".join(lines))
        assert resp == "OK", f"Expected OK, got {resp!r}"

Exemple #44

0

Afficher le fichier

Fichier : pywren-pocket-local.py Projet : charles-typ/shuffle-pocket

    def run_command(key):
        """
        keylist.append({'taskId': i,
                        'job_number': job_number,
                        'total_input': numTasks,
                        'write_element_size': write_element_size,
                        'process_time': process_time,
                        'total_time': total_time})
        """
        #pywren.wrenlogging.default_config('INFO')
        begin_of_function = time.time()
        logger = logging.getLogger(__name__)
        logger.info("taskId = " + str(key['taskId']))
        taskId = key['taskId']
        jobid_int = int(key['job_number'])
        write_element_size = int(key['write_element_size'])
        process_time = int(key['process_time'])
        total_time = int(key['total_time'])
        #pocket_namenode = pocket.connect("10.1.0.10", 9070)

        [read_time, work_time, write_time] = [0] * 3
        start_time = time.time()

        # a total of 10 threads
        number_of_clients = 1
        write_pool = ThreadPool(number_of_clients)

        time.sleep(process_time)


        logger.info("Process finish here: " + str(time.time()))

        def write_work_client(writer_key):
            start_time = time.time()
            client_id = int(writer_key['client_id'])
            taskID = writer_key['taskId']
            jobID = writer_key['jobid']
            datasize = writer_key['write_element_size']
                #datasize = 1310720
            total_time = writer_key['total_time']
            logging.info(total_time)
            body = b'a' * datasize
            client_id = int(client_id)
            count = 0
            while time.time() < start_time + total_time:
                count = count + 1
                keyname = str(taskID) + "-" + str(count)
                m = hashlib.md5()
                m.update(keyname.encode('utf-8'))
                randomized_keyname = str(taskID) + '-' + m.hexdigest()[:8] + '-' + str(count)
                #logger.info("(" + str(taskId) + ")" + "The name of the key to write is: " + randomized_keyname)
                logger.info("[POCKET] [" + str(jobID) + "] " + str(time.time_ns()) + " " + str(taskID) + " " + str(len(body)) + " write " + "S")
                #r = pocket.put_buffer_bytes(pocket_namenode, body, len(body), randomized_keyname, jobid)
                #logger.info("[POCKET] [" + str(jobID) + "] " + str(time.time_ns()) + " " + str(taskID) + " " + str(len(body)) + " write " + "E " + str(r) )
                logger.info("[POCKET] [" + str(jobID) + "] " + str(time.time_ns()) + " " + str(taskID) + " " + str(len(body)) + " write " + "E ")

            logger.info("Write finish here: " + str(time.time()))

        writer_keylist = []
        number_of_clients = int(number_of_clients)
        for i in range(number_of_clients):
            writer_keylist.append({'client_id': i,
                                   'taskId': taskId,
                                   'jobid': jobid_int,
                                   'write_element_size': write_element_size,
                                   'total_time': total_time})

        write_pool_handler_container = []
        write_pool_handler = write_pool.map_async(write_work_client, writer_keylist)
        write_pool_handler_container.append(write_pool_handler)
        start_time = time.time()

        if len(write_pool_handler_container) > 0:
            write_pool_handler = write_pool_handler_container.pop()
            write_pool_handler.wait()
            twait_end = time.time()
            #logger.info("(" + str(taskId) + ")" + 'last write time = ' + str(twait_end - t3))
            write_time = twait_end - start_time
        write_pool.close()
        write_pool.join()
        end_of_function = time.time()
        return begin_of_function, end_of_function, read_time, work_time, write_time

Exemple #45

0

Afficher le fichier

class ApiClient(object):
    """
    Generic API client for Swagger client library builds.

    Swagger generic API client. This client handles the client-
    server communication, and is invariant across implementations. Specifics of
    the methods and models for each application are generated from the Swagger
    templates.

    NOTE: This class is auto generated by the swagger code generator program.
    Ref: https://github.com/swagger-api/swagger-codegen
    Do not edit the class manually.

    :param configuration: .Configuration object for this client
    :param header_name: a header to pass when making calls to the API.
    :param header_value: a header value to pass when making calls to
        the API.
    :param cookie: a cookie to include in the header when making calls
        to the API
    """

    PRIMITIVE_TYPES = (float, bool, bytes, text_type) + integer_types
    NATIVE_TYPES_MAPPING = {
        'int': int,
        'long': int if PY3 else long,
        'float': float,
        'str': str,
        'bool': bool,
        'date': date,
        'datetime': datetime,
        'object': object,
    }

    def __init__(self,
                 configuration=None,
                 header_name=None,
                 header_value=None,
                 cookie=None):
        if configuration is None:
            configuration = Configuration()
        self.configuration = configuration

        self.pool = ThreadPool()
        self.rest_client = RESTClientObject(configuration)
        self.default_headers = {}
        if header_name is not None:
            self.default_headers[header_name] = header_value
        self.cookie = cookie
        # Set default User-Agent.
        self.user_agent = 'Swagger-Codegen/1.0.0/python'
        ########### Change
        # Store last api call metadata
        self.last_metadata = {}
        ########### End Change

    def __del__(self):
        self.pool.close()
        self.pool.join()

    @property
    def user_agent(self):
        """User agent for this API client"""
        return self.default_headers['User-Agent']

    @user_agent.setter
    def user_agent(self, value):
        self.default_headers['User-Agent'] = value

    def set_default_header(self, header_name, header_value):
        self.default_headers[header_name] = header_value

    ########### Change
    def metadata_wrapper(fn):
        """Save metadata of last api call."""
        @functools.wraps(fn)
        def wrapped_f(self, *args, **kwargs):
            self.last_metadata = {}
            self.last_metadata["url"] = self.configuration.host + args[0]
            self.last_metadata["method"] = args[1]
            self.last_metadata["timestamp"] = time.time()
            try:
                return fn(self, *args, **kwargs)
            except Exception as e:
                self.last_metadata["exception"] = e
                raise

        return wrapped_f

    def get_last_metadata(self):
        return self.last_metadata

    ########### End Change

    @metadata_wrapper
    def __call_api(self,
                   resource_path,
                   method,
                   path_params=None,
                   query_params=None,
                   header_params=None,
                   body=None,
                   post_params=None,
                   files=None,
                   response_type=None,
                   auth_settings=None,
                   _return_http_data_only=None,
                   collection_formats=None,
                   _preload_content=True,
                   _request_timeout=None):

        config = self.configuration

        # header parameters
        header_params = header_params or {}
        header_params.update(self.default_headers)
        if self.cookie:
            header_params['Cookie'] = self.cookie
        if header_params:
            header_params = self.sanitize_for_serialization(header_params)
            header_params = dict(
                self.parameters_to_tuples(header_params, collection_formats))

        # path parameters
        if path_params:
            path_params = self.sanitize_for_serialization(path_params)
            path_params = self.parameters_to_tuples(path_params,
                                                    collection_formats)
            for k, v in path_params:
                # specified safe chars, encode everything
                resource_path = resource_path.replace(
                    '{%s}' % k,
                    quote(str(v), safe=config.safe_chars_for_path_param))

        # query parameters
        if query_params:
            query_params = self.sanitize_for_serialization(query_params)
            query_params = self.parameters_to_tuples(query_params,
                                                     collection_formats)

        # post parameters
        if post_params or files:
            post_params = self.prepare_post_parameters(post_params, files)
            post_params = self.sanitize_for_serialization(post_params)
            post_params = self.parameters_to_tuples(post_params,
                                                    collection_formats)

        # auth setting
        self.update_params_for_auth(header_params, query_params, auth_settings)

        # body
        if body:
            body = self.sanitize_for_serialization(body)

        # request url
        url = self.configuration.host + resource_path

        # perform request and return response
        response_data = self.request(method,
                                     url,
                                     query_params=query_params,
                                     headers=header_params,
                                     post_params=post_params,
                                     body=body,
                                     _preload_content=_preload_content,
                                     _request_timeout=_request_timeout)

        self.last_response = response_data

        return_data = response_data
        if _preload_content:
            # deserialize response data
            if response_type:
                return_data = self.deserialize(response_data, response_type)
            else:
                return_data = None

        ########### Change
        self.last_metadata["response"] = response_data
        self.last_metadata["return_data"] = return_data
        ########### End Change

        if _return_http_data_only:
            return (return_data)
        else:
            return (return_data, response_data.status,
                    response_data.getheaders())

    def sanitize_for_serialization(self, obj):
        """
        Builds a JSON POST object.

        If obj is None, return None.
        If obj is str, int, long, float, bool, return directly.
        If obj is datetime.datetime, datetime.date
            convert to string in iso8601 format.
        If obj is list, sanitize each element in the list.
        If obj is dict, return the dict.
        If obj is swagger model, return the properties dict.

        :param obj: The data to serialize.
        :return: The serialized form of data.
        """
        if obj is None:
            return None
        elif isinstance(obj, self.PRIMITIVE_TYPES):
            return obj
        elif isinstance(obj, list):
            return [
                self.sanitize_for_serialization(sub_obj) for sub_obj in obj
            ]
        elif isinstance(obj, tuple):
            return tuple(
                self.sanitize_for_serialization(sub_obj) for sub_obj in obj)
        elif isinstance(obj, (datetime, date)):
            return obj.isoformat()

        if isinstance(obj, dict):
            obj_dict = obj
        else:
            # Convert model obj to dict except
            # attributes `swagger_types`, `attribute_map`
            # and attributes which value is not None.
            # Convert attribute name to json key in
            # model definition for request.
            obj_dict = {
                obj.attribute_map[attr]: getattr(obj, attr)
                for attr, _ in iteritems(obj.swagger_types)
                if getattr(obj, attr) is not None
            }

        return {
            key: self.sanitize_for_serialization(val)
            for key, val in iteritems(obj_dict)
        }

    def deserialize(self, response, response_type):
        """
        Deserializes response into an object.

        :param response: RESTResponse object to be deserialized.
        :param response_type: class literal for
            deserialized object, or string of class name.

        :return: deserialized object.
        """
        # handle file downloading
        # save response body into a tmp file and return the instance
        if response_type == "file":
            return self.__deserialize_file(response)

        # fetch data from response object
        try:
            data = json.loads(response.data)
        except ValueError:
            data = response.data

        return self.__deserialize(data, response_type)

    def __deserialize(self, data, klass):
        """
        Deserializes dict, list, str into an object.

        :param data: dict, list or str.
        :param klass: class literal, or string of class name.

        :return: object.
        """
        if data is None:
            return None

        if type(klass) == str:
            if klass.startswith('list['):
                sub_kls = re.match('list\[(.*)\]', klass).group(1)
                return [
                    self.__deserialize(sub_data, sub_kls) for sub_data in data
                ]

            if klass.startswith('dict('):
                sub_kls = re.match('dict\(([^,]*), (.*)\)', klass).group(2)
                return {
                    k: self.__deserialize(v, sub_kls)
                    for k, v in iteritems(data)
                }

            # convert str to class
            if klass in self.NATIVE_TYPES_MAPPING:
                klass = self.NATIVE_TYPES_MAPPING[klass]
            else:
                klass = getattr(models, klass)

        if klass in self.PRIMITIVE_TYPES:
            return self.__deserialize_primitive(data, klass)
        elif klass == object:
            return self.__deserialize_object(data)
        elif klass == date:
            return self.__deserialize_date(data)
        elif klass == datetime:
            return self.__deserialize_datatime(data)
        else:
            return self.__deserialize_model(data, klass)

    def call_api(self,
                 resource_path,
                 method,
                 path_params=None,
                 query_params=None,
                 header_params=None,
                 body=None,
                 post_params=None,
                 files=None,
                 response_type=None,
                 auth_settings=None,
                 asynchronous=None,
                 _return_http_data_only=None,
                 collection_formats=None,
                 _preload_content=True,
                 _request_timeout=None):
        """
        Makes the HTTP request (synchronous) and return the deserialized data.
        To make an async request, set the asynchronous parameter.

        :param resource_path: Path to method endpoint.
        :param method: Method to call.
        :param path_params: Path parameters in the url.
        :param query_params: Query parameters in the url.
        :param header_params: Header parameters to be
            placed in the request header.
        :param body: Request body.
        :param post_params dict: Request post form parameters,
            for `application/x-www-form-urlencoded`, `multipart/form-data`.
        :param auth_settings list: Auth Settings names for the request.
        :param response: Response data type.
        :param files dict: key -> filename, value -> filepath,
            for `multipart/form-data`.
        :param asynchronous bool: execute request asynchronously
        :param _return_http_data_only: response data without head status code and headers
        :param collection_formats: dict of collection formats for path, query,
            header, and post parameters.
        :param _preload_content: if False, the urllib3.HTTPResponse object will be returned without
                                 reading/decoding response data. Default is True.
        :param _request_timeout: timeout setting for this request. If one number provided, it will be total request
                                 timeout. It can also be a pair (tuple) of (connection, read) timeouts.
        :return:
            If asynchronous parameter is True,
            the request will be called asynchronously.
            The method will return the request thread.
            If parameter asynchronous is False or missing,
            then the method will return the response directly.
        """
        if not asynchronous:
            return self.__call_api(resource_path, method, path_params,
                                   query_params, header_params, body,
                                   post_params, files, response_type,
                                   auth_settings, _return_http_data_only,
                                   collection_formats, _preload_content,
                                   _request_timeout)
        else:
            thread = self.pool.apply_async(
                self.__call_api,
                (resource_path, method, path_params, query_params,
                 header_params, body, post_params, files, response_type,
                 auth_settings, _return_http_data_only, collection_formats,
                 _preload_content, _request_timeout))
        return thread

    def request(self,
                method,
                url,
                query_params=None,
                headers=None,
                post_params=None,
                body=None,
                _preload_content=True,
                _request_timeout=None):
        """
        Makes the HTTP request using RESTClient.
        """
        if method == "GET":
            return self.rest_client.GET(url,
                                        query_params=query_params,
                                        _preload_content=_preload_content,
                                        _request_timeout=_request_timeout,
                                        headers=headers)
        elif method == "HEAD":
            return self.rest_client.HEAD(url,
                                         query_params=query_params,
                                         _preload_content=_preload_content,
                                         _request_timeout=_request_timeout,
                                         headers=headers)
        elif method == "OPTIONS":
            return self.rest_client.OPTIONS(url,
                                            query_params=query_params,
                                            headers=headers,
                                            post_params=post_params,
                                            _preload_content=_preload_content,
                                            _request_timeout=_request_timeout,
                                            body=body)
        elif method == "POST":
            return self.rest_client.POST(url,
                                         query_params=query_params,
                                         headers=headers,
                                         post_params=post_params,
                                         _preload_content=_preload_content,
                                         _request_timeout=_request_timeout,
                                         body=body)
        elif method == "PUT":
            return self.rest_client.PUT(url,
                                        query_params=query_params,
                                        headers=headers,
                                        post_params=post_params,
                                        _preload_content=_preload_content,
                                        _request_timeout=_request_timeout,
                                        body=body)
        elif method == "PATCH":
            return self.rest_client.PATCH(url,
                                          query_params=query_params,
                                          headers=headers,
                                          post_params=post_params,
                                          _preload_content=_preload_content,
                                          _request_timeout=_request_timeout,
                                          body=body)
        elif method == "DELETE":
            return self.rest_client.DELETE(url,
                                           query_params=query_params,
                                           headers=headers,
                                           _preload_content=_preload_content,
                                           _request_timeout=_request_timeout,
                                           body=body)
        else:
            raise ValueError("http method must be `GET`, `HEAD`, `OPTIONS`,"
                             " `POST`, `PATCH`, `PUT` or `DELETE`.")

    def parameters_to_tuples(self, params, collection_formats):
        """
        Get parameters as list of tuples, formatting collections.

        :param params: Parameters as dict or list of two-tuples
        :param dict collection_formats: Parameter collection formats
        :return: Parameters as list of tuples, collections formatted
        """
        new_params = []
        if collection_formats is None:
            collection_formats = {}
        for k, v in iteritems(params) if isinstance(params, dict) else params:
            if k in collection_formats:
                collection_format = collection_formats[k]
                if collection_format == 'multi':
                    new_params.extend((k, value) for value in v)
                else:
                    if collection_format == 'ssv':
                        delimiter = ' '
                    elif collection_format == 'tsv':
                        delimiter = '\t'
                    elif collection_format == 'pipes':
                        delimiter = '|'
                    else:  # csv is the default
                        delimiter = ','
                    new_params.append(
                        (k, delimiter.join(str(value) for value in v)))
            else:
                new_params.append((k, v))
        return new_params

    ########### Change

    def prepare_post_parameters(self, post_params=None, files=None):
        """
        Builds form parameters.

        :param post_params: Normal form parameters.
        :param files: File parameters.
        :return: Form parameters with files.
        """
        params = post_params or []
        for key, values in (files or {}).items():
            for maybe_file_or_path in values if isinstance(values,
                                                           list) else [values]:
                try:
                    # use the parameter as if it was an open file object
                    data = maybe_file_or_path.read()
                    maybe_file_or_path = maybe_file_or_path.name
                except AttributeError:
                    # then it is presumably a file path
                    with open(maybe_file_or_path, 'rb') as fh:
                        data = fh.read()
                basepath = os.path.basename(maybe_file_or_path)
                mimetype = mimetypes.guess_type(
                    basepath)[0] or 'application/octet-stream'
                params.append((key, (basepath, data, mimetype)))
        return params

    ########### End Change

    def select_header_accept(self, accepts):
        """
        Returns `Accept` based on an array of accepts provided.

        :param accepts: List of headers.
        :return: Accept (e.g. application/json).
        """
        if not accepts:
            return

        accepts = [x.lower() for x in accepts]

        if 'application/json' in accepts:
            return 'application/json'
        else:
            return ', '.join(accepts)

    def select_header_content_type(self, content_types):
        """
        Returns `Content-Type` based on an array of content_types provided.

        :param content_types: List of content-types.
        :return: Content-Type (e.g. application/json).
        """
        if not content_types:
            return 'application/json'

        content_types = [x.lower() for x in content_types]

        if 'application/json' in content_types or '*/*' in content_types:
            return 'application/json'
        else:
            return content_types[0]

    def update_params_for_auth(self, headers, querys, auth_settings):
        """
        Updates header and query params based on authentication setting.

        :param headers: Header parameters dict to be updated.
        :param querys: Query parameters tuple list to be updated.
        :param auth_settings: Authentication setting identifiers list.
        """
        if not auth_settings:
            return

        for auth in auth_settings:
            auth_setting = self.configuration.auth_settings().get(auth)
            if auth_setting:
                if not auth_setting['value']:
                    continue
                elif auth_setting['in'] == 'header':
                    headers[auth_setting['key']] = auth_setting['value']
                elif auth_setting['in'] == 'query':
                    querys.append((auth_setting['key'], auth_setting['value']))
                else:
                    raise ValueError(
                        'Authentication token must be in `query` or `header`')

    def __deserialize_file(self, response):
        """
        Saves response body into a file in a temporary folder,
        using the filename from the `Content-Disposition` header if provided.

        :param response:  RESTResponse.
        :return: file path.
        """
        fd, path = tempfile.mkstemp(dir=self.configuration.temp_folder_path)
        os.close(fd)
        os.remove(path)

        content_disposition = response.getheader("Content-Disposition")
        if content_disposition:
            filename = re.\
                search(r'filename=[\'"]?([^\'"\s]+)[\'"]?', content_disposition).\
                group(1)
            path = os.path.join(os.path.dirname(path), filename)

        with open(path, "w") as f:
            f.write(response.data)

        return path

    def __deserialize_primitive(self, data, klass):
        """
        Deserializes string to primitive type.

        :param data: str.
        :param klass: class literal.

        :return: int, long, float, str, bool.
        """
        try:
            return klass(data)
        except UnicodeEncodeError:
            return unicode(data)
        except TypeError:
            return data

    def __deserialize_object(self, value):
        """
        Return a original value.

        :return: object.
        """
        return value

    def __deserialize_date(self, string):
        """
        Deserializes string to date.

        :param string: str.
        :return: date.
        """
        try:
            from dateutil.parser import parse
            return parse(string).date()
        except ImportError:
            return string
        except ValueError:
            raise ApiException(
                status=0,
                reason="Failed to parse `{0}` into a date object".format(
                    string))

    def __deserialize_datatime(self, string):
        """
        Deserializes string to datetime.

        The string should be in iso8601 datetime format.

        :param string: str.
        :return: datetime.
        """
        try:
            from dateutil.parser import parse
            return parse(string)
        except ImportError:
            return string
        except ValueError:
            raise ApiException(
                status=0,
                reason=("Failed to parse `{0}` into a datetime object".format(
                    string)))

    def __deserialize_model(self, data, klass):
        """
        Deserializes list or dict to model.

        :param data: dict, list.
        :param klass: class literal.
        :return: model object.
        """

        if not klass.swagger_types and not hasattr(klass,
                                                   'get_real_child_model'):
            return data

        kwargs = {}
        if klass.swagger_types is not None:
            for attr, attr_type in iteritems(klass.swagger_types):
                if data is not None \
                   and klass.attribute_map[attr] in data \
                   and isinstance(data, (list, dict)):
                    value = data[klass.attribute_map[attr]]
                    kwargs[attr] = self.__deserialize(value, attr_type)

        instance = klass(**kwargs)

        if hasattr(instance, 'get_real_child_model'):
            klass_name = instance.get_real_child_model(data)
            if klass_name:
                instance = self.__deserialize(data, klass_name)
        return instance

Exemple #46

0

Afficher le fichier

def read_async(iterable,
               urlkey=None,
               max_workers=None,
               blocksize=1024 * 1024,
               decode=None,
               raise_http_err=True,
               timeout=None,
               unordered=True,
               openers=None,
               **kwargs):  # pylint:disable=too-many-arguments
    """
    Wrapper around `multiprocessing.pool.ThreadPool()` for downloading
    data from urls in `iterable` asynchronously with remarkable performance boost for large
    downloads. Each download is executed on a separate *worker thread*, yielding the result of
    each `url` read.

    Yields the tuple:
    ```
        obj, result, exc, url
    ```
    where:

      - `obj` is the element of `iterable` which originated the `urlread` call
      - `result` is the result of `urlread`, it is None in case of errors (see `exc` below).
        Otherwise, it is the tuple
        ```(data, status_code, message)```
         where:
         * `data` is the data read (as bytes or string if `decode != None`). It can be None
            when `raise_http_err=False` and an HTTPException occurred
         * `status_code` is the integer denoting the status code (e.g. 200), and
         * `messsage` the string denoting the status message (e.g., 'OK').
      - `exc` is the exception raised by `urlread`, if any. **Either `result` or `exc` are None,
        but not both**. Note that `exc` is one of the following URL-related exceptions:
        ```urllib2.URLError, httplib.HTTPException, socket.error```
        Any other exception is raised and will stop the download
      - `url` is the original url (either string or Request object). If `iterable` is an
        iterable of `Request` objects or url strings, then `url` is equal to `obj`

    Note that if `raise_http_err=False` then `HTTPError`s are treated as 'normal'
    response and will be yielded in `result` as a tuple where `data=None` and `status_code`
    is most likely greater or equal to 400.
    Finally, this function can cleanly cancel yet-to-be-processed *worker threads* via Ctrl+C
    if executed from the command line. In the following we will simply refer to `urlread`
    to indicate the `urllib2.urlopen.read` function.

    :param iterable: an iterable of objects representing the urls addresses to be read: if its
        elements are neither strings nor `Request` objects, the `urlkey` argument (see below)
        must be specified to map each element to a valid url string or Request
    :param urlkey: function or None. When None (the default), all elements of `iterable` must be
        url strings or Request objects. When function, it will be called with each element of
        `iterable` as argument, and must return the mapped url address or Request.
    :param max_workers: integer or None (the default) denoting the max workers of the
        `ThreadPoolExecutor`. When None, the theads allocated are relative to the machine cpu
    :param blocksize: integer defaulting to 1024*1024 specifying, when connecting to one of
        the given urls, the mximum number of bytes to be read at each call of `urlopen.read`.
        If the size argument is negative or omitted, read all data until EOF is reached
    :param decode: string or None (default: None) optional argument specifying if the content
        of the url must be decoded. None means: return the byte string as it was read.
        Otherwise, use this argument for string content (not bytes) by supplying a decoding,
        such as e.g. 'utf8'
    :param raise_http_err: boolean (True by default) tells whether `HTTPError`s should
        be yielded as exceptions or not. When False, `HTTPError`s are yielded as normal
        responses in `result` as the tuple `(None, status_code, message)`  (where `status_code`
        is most likely greater or equal to 400)
    :param timeout: timeout parameter specifies a timeout in seconds for blocking operations
        like the connection attempt (if not specified, None or non-positive, the global default
        timeout setting will be used). This actually only works for HTTP, HTTPS and FTP
        connections.
    :param unordered: boolean (default False): tells whether the download results are yielded
        in the same order they are input in `iterable`. Theoretically (tests did not show any
        remarkable difference), False (the default) might execute faster, but results are not
        guaranteed to be yielded in the same order as `iterable`.
    :param openers: a function behaving like `urlkey`, should return a specific opener
        for the given item of iterable. When None, the default urllib opener is used
        See :func:`get_opener` for, e.g., creating an opener from a base url, user and passowrd
    :param kwargs: optional arguments to be passed to the underlying python `urlopen` function.
        These arguments are ignored if a custom `openers` function is provided

    Notes:
    ======

    ThreadPool vs ThreadPoolExecutor
    --------------------------------

    This function changed from using `concurrent.futures.ThreadPoolExecutor` into
    the "old" `multiprocessing.pool.ThreadPool`: the latter consumes in most cases
    less memory (about 30% less), especially if `iterable` is not a list in memory but
    a python iterable (`concurrent.futures.ThreadPoolExecutor` builds a `set` of
    `Future`s object from `iterable`, whereas `multiprocessing.pool.ThreadPool` seems just
    to execute each element in iterable)

    killing threads / handling exceptions
    -------------------------------------

    This function handles any kind of unexpected exception (particularly relevant in case of
    e.g., `KeyboardInterrupt`) by canceling all worker threads before raising. As
    ThreadPoolExecutor returns (or raises) after all worker threads have finished, an internal
    boolean flag makes all remaining worker threads quit as soon as possible, making the
    function return (or raise) much more quickly
    """
    # flag for CTRL-C or cancelled tasks
    kill = False

    # function called from within urlread to check if go on or not
    def urlwrapper(obj):
        if kill:
            return None
        url = urlkey(obj) if urlkey is not None else obj
        opener = openers(obj) if openers is not None else None
        try:
            return obj, \
                urlread(url, blocksize, decode, True, raise_http_err, timeout, opener,
                        **kwargs), \
                None, url
        except URLException as urlexc:
            return obj, None, urlexc.exc, url

    tpool = ThreadPool(max_workers)
    threadpoolmap = tpool.imap_unordered if unordered else tpool.imap  # (func, iterable, chunksize)
    # note above: chunksize argument for threads (not processes)
    # seems to slow down download. Omit the argument and leave chunksize=1 (default)
    try:
        # this try is for the keyboard interrupt, which will be caught inside the
        # as_completed below
        for result_tuple in threadpoolmap(urlwrapper, iterable):
            if kill:
                continue  # (for safety: we should never enter here)
            yield result_tuple
    except:
        # According to this post:
        # http://stackoverflow.com/questions/29177490/how-do-you-kill-futures-once-they-have-started,
        # after a KeyboardInterrupt this method does not return until all
        # working threads have finished. Thus, we implement the `kill` flag
        # which makes them exit immediately, and hopefully this function will return within
        # seconds at most. We catch  a bare except cause we want the same to apply to all
        # other exceptions which we might raise (see few line above)
        kill = True
        # the time here before executing 'raise' below is the time taken to finish all threads.
        # Without the line above, it might be a lot (minutes, hours), now it is much shorter
        # (in the order of few seconds max) and the command below can be executed quickly:
        raise

    tpool.close()

Exemple #47

0

Afficher le fichier

def new_order(session, w_id, d_id, c_id, num_items, item_number,
              supplier_warehouse, quantity):
    # Step 1
    n = 0
    n = utils.single_select(
        session,
        'SELECT D_O_ID_OFST from district WHERE D_W_ID = %s AND D_ID = %s',
        (w_id, d_id))
    n += utils.single_select(
        session,
        'SELECT D_O_COUNTER from district_counters WHERE D_W_ID = %s AND D_ID = %s',
        (w_id, d_id))

    # Step 2
    utils.do_query(
        session,
        'UPDATE district_counters SET D_O_COUNTER = D_O_COUNTER + 1 WHERE D_W_ID = %s AND D_ID = %s',
        (w_id, d_id))

    # Step 3
    all_local = 1
    for i in range(num_items):
        if supplier_warehouse[i] != w_id:
            all_local = 0
            break
    current_datetime = datetime.now()

    # Step 4 & 5
    total_amount = 0
    item_amount = [0] * num_items
    adjusted_qty = [0] * num_items
    cql_insert_item_orders = session.prepare(
        "INSERT INTO item_orders (W_ID, I_ID, O_ID, D_ID, C_ID) VALUES (?, ?, ?, ?, ?)"
    )

    def handle_item(i):
        nonlocal total_amount
        # for i in range(num_items):
        # Step 5a
        s_quantity = utils.single_select(
            session,
            'SELECT S_QUANTITY FROM stock WHERE S_W_ID = %s AND S_I_ID = %s',
            (supplier_warehouse[i], item_number[i]))
        # Step 5b
        adjusted_qty[i] = s_quantity - quantity[i]
        # Step 5c
        if adjusted_qty[i] < 10:
            adjusted_qty[i] += 100
        # Step 5d
        utils.do_query(
            session,
            'UPDATE stock SET S_QUANTITY = %s WHERE S_W_ID = %s AND S_I_ID = %s',
            (adjusted_qty[i], supplier_warehouse[i], item_number[i]))
        utils.do_query(
            session, '''
            UPDATE stock_counters
            SET S_YTD_CHANGE = S_YTD_CHANGE + %s,
                S_ORDER_CNT_CHANGE = S_ORDER_CNT_CHANGE + 1
            WHERE S_W_ID = %s AND S_I_ID = %s
            ''', (quantity[i], supplier_warehouse[i], item_number[i]))
        if supplier_warehouse[i] != w_id:
            utils.do_query(
                session, '''
                UPDATE stock_counters SET S_REMOTE_CNT_CHANGE = S_REMOTE_CNT_CHANGE + 1
                WHERE S_W_ID = %s AND S_I_ID = %s
                ''', (supplier_warehouse[i], item_number[i]))
        # Step 5e
        i_price = utils.single_select(
            session, 'SELECT I_PRICE FROM item WHERE I_ID = %s',
            (item_number[i], ))
        item_amount[i] = quantity[i] * i_price
        # Step 5f
        total_amount += item_amount[i]
        # Step 5g
        dist_name = 'S_DIST_' + str(d_id)
        dist_info = utils.single_select(
            session,
            'SELECT {} FROM stock WHERE S_W_ID = {} AND S_I_ID = {}'.format(
                dist_name, supplier_warehouse[i], item_number[i]))
        utils.do_query(
            session, '''
            INSERT INTO order_line (OL_O_ID, OL_D_ID, OL_W_ID, OL_NUMBER, OL_I_ID, OL_SUPPLY_W_ID, OL_QUANTITY, OL_AMOUNT, OL_DIST_INFO)
            VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)
            ''', (n, d_id, w_id, i, item_number[i], supplier_warehouse[i],
                  quantity[i], item_amount[i], dist_info))
        # Populate the item_orders table for each item-order pair
        utils.do_query(session,
                       cql_insert_item_orders,
                       (w_id, item_number[i], n, d_id, c_id),
                       query_type='write')

    pool = ThreadPool(8)
    pool.map(handle_item, range(num_items))
    pool.close()

    # Create order after creating all order-lines, so that when querying for popular items there will be no error
    utils.do_query(
        session, '''
        INSERT INTO orders (O_ID, O_D_ID, O_W_ID, O_C_ID, O_ENTRY_D, O_OL_CNT, O_ALL_LOCAL)
        VALUES (%s, %s, %s, %s, %s, %s, %s)
        ''', (n, d_id, w_id, c_id, datetime.now(), num_items, all_local))

    # Step 6
    w_tax = utils.single_select(session,
                                'SELECT W_TAX FROM warehouse WHERE W_ID = %s',
                                (w_id, ))
    d_tax = utils.single_select(
        session, 'SELECT D_TAX FROM district WHERE D_W_ID = %s AND D_ID = %s',
        (w_id, d_id))
    c_discount = utils.single_select(
        session,
        'SELECT C_DISCOUNT FROM customer WHERE C_W_ID = %s AND C_D_ID = %s AND C_ID = %s',
        (w_id, d_id, c_id))
    total_amount *= (1 + d_tax + w_tax) * (1 - c_discount)

    # Output
    output = {}
    output['w_id'] = w_id
    output['d_id'] = d_id
    output['c_id'] = c_id
    rows = utils.do_query(
        session,
        'SELECT C_LAST, C_CREDIT, C_DISCOUNT FROM customer WHERE C_W_ID = %s AND C_D_ID = %s AND C_ID = %s',
        (w_id, d_id, c_id))
    for row in rows:
        output['c_last'] = row.c_last
        output['c_credit'] = row.c_credit
        output['c_discount'] = row.c_discount
        break
    output['w_tax'] = w_tax
    output['d_tax'] = d_tax
    output['o_id'] = n
    output['o_entry_d'] = current_datetime
    output['num_items'] = num_items
    output['total_amount'] = total_amount
    output['item_infos'] = []
    for i in range(num_items):
        i_name = utils.single_select(
            session, 'SELECT I_NAME FROM item WHERE I_ID = %s',
            (item_number[i], ))
        output['item_infos'].append(
            (item_number[i], i_name, supplier_warehouse[i], quantity[i],
             item_amount[i], adjusted_qty[i]))
    return output

Exemple #48

0

Afficher le fichier

                    my_dandelion = Dandelion()
                    my_dandelion.doi = my_dict["doi"]
                    my_dandelion.num_events = my_dict["num_events"]
                    db.session.add(my_dandelion)
                    # but don't append it; it doesn't need to get run

            safe_commit(db)

            print "now calling dandelion"

            use_multithreaded = False
            if use_multithreaded:
                my_thread_pool = ThreadPool(50)
                results = my_thread_pool.imap_unordered(
                    call_dandelion_on_article, my_dandelions)
                my_thread_pool.close()
                my_thread_pool.join()
                my_thread_pool.terminate()

            else:
                results = []
                for my_dandelion in my_dandelions:
                    results.append(call_dandelion_on_article(my_dandelion))

            try:
                for (my_result, my_error, rate_limit_exceeded) in results:
                    if rate_limit_exceeded:
                        print "sleeping for a few minutes because rate_limit_exceeded", my_error
                        sleep(60 * 5)
            except Exception as e:
                print e

Exemple #49

0

Afficher le fichier

def multi_thread_do_job(l, func=do_job, size=threads_number):
    tp = ThreadPool(size)
    results = tp.map(func, l)
    tp.close()
    tp.join()
    return results

Exemple #50

0

Afficher le fichier

Fichier : sensor_defs.py Projet : nicolemariegraf/sebastian-punting-gait

def initialize_sensor(sensor_type):
    ### Function for initializing sensor.
    # sensor_type:		Type of sensor; currently supported: "MLX90393", "BNO055"
    if sensor_type == 'MLX90393':
        initialized = False
        while initialized == False:
            # Set up serial communication with Arduino(s)
            port1 = '/dev/ttyUSB0'
            ser1 = serial.Serial(port1, 115200, timeout=2)
            ser1.flushInput()

            port2 = '/dev/ttyUSB1'
            ser2 = serial.Serial(port2, 115200, timeout=2)
            ser2.flushInput()

            pool = ThreadPool(processes=2)
            collectData_result1 = pool.apply_async(collectData,
                                                   ('MLX90393', 9, ser1))
            collectData_result2 = pool.apply_async(collectData,
                                                   ('MLX90393', 9, ser2))
            pool.close()
            pool.join()
            #print(collectData_result1.get())

            data_error = False
            try:
                temp = collectData_result1.get()
                if 0.0 in temp:
                    data_error = True
                    print('Error in left tripod sensor(s)')

            except:
                data_error = True
                print('Error in left tripod sensor(s)')

            try:
                temp = collectData_result2.get()
                if 0.0 in temp:
                    data_error = True
                    print('Error in right tripod sensor(s)')
            except:
                data_error = True
                print('Error in right tripod sensor(s)')

            if data_error:
                print(
                    'Failed to initialize one or more magnetometers, trying again...'
                )
                ser1.close()
                ser2.close()
                time.sleep(1)
            else:
                initialized = True
                print('Initialized magnetometers.')
                time.sleep(1)
                return [ser1, ser2]

    elif sensor_type == 'BNO055':
        initialized = False
        while initialized == False:
            # Set up serial communication with Arduino(s)
            port3 = '/dev/ttyUSB2'
            ser3 = serial.Serial(port3, 115200, timeout=2)
            ser3.flushInput()

            data_error = False
            try:
                result1 = collectData('BNO055', 6, ser3)
            except:
                data_error = True

            if data_error:
                print('Failed to initialize IMU, trying again...')
                ser3.close()
                time.sleep(1)
            else:
                initialized = True
                print('Initialized IMU.')
                time.sleep(1)
                return ser3

Exemple #51

0

Afficher le fichier

    def process_callbacks(self, callback_collection, kwargs):
        """
        Processes a collection of callbacks or hooks for a particular event, namely pre, hook or
        post.

        The functions are passed in as an array to ``callback_collection`` and process callbacks
        first iterates each function and ensures that each one has the correct arguments available
        to it. If not, an Exception is raised. Then, depending on whether Threading is enabled or
        not, the functions are either run sequentially, or loaded into a ThreadPool and executed
        asynchronously.

        The returned local and global updates are either collected and processed sequentially, as
        in the case of the non-threaded behaviour, or collected at the end of the
        callback_collection processing and handled there.

        Note:
            It is impossible to predict the order of the functions being run. If the order is
            important, it is advised to create a second event hook that will be fired before the
            other. Rigger has no concept of hook or callback order and is unlikely to ever have.

        Args:
            callback_collection: A list of functions to call.
            kwargs: A set of kwargs to pass to the functions.

        Returns: A tuple of local and global namespace updates.
        """
        loc_collect = {}
        glo_collect = {}
        if self._threaded:
            results_list = []
            pool = ThreadPool(10)
        for cb in callback_collection:
            required_args = [
                sig for sig in cb['args']
                if isinstance(cb['args'][sig].default, type)
            ]
            missing = list(
                set(required_args).difference(set(
                    self.global_data.keys())).difference(set(kwargs.keys())))
            if not missing:
                new_kwargs = self.build_kwargs(cb['args'], kwargs)
                if self._threaded:
                    results_list.append(
                        pool.apply_async(cb['func'], [], new_kwargs))
                else:
                    obtain_result = self.handle_results(
                        cb['func'], [], new_kwargs)
                    loc_collect, glo_collect = self.handle_collects(
                        obtain_result, loc_collect, glo_collect)
            else:
                raise Exception('Function {} is missing kwargs {}'.format(
                    cb['func'].__name__, missing))

        if self._threaded:
            pool.close()
            pool.join()
            for result in results_list:
                obtain_result = self.handle_results(result.get, [], {})
                loc_collect, glo_collect = self.handle_collects(
                    obtain_result, loc_collect, glo_collect)
        return loc_collect, glo_collect

Exemple #52

0

Afficher le fichier

Fichier : task.py Projet : kaestli/mediatorws

class StationXMLNetworkCombinerTask(CombinerTask):
    """
    Task downloading and combining `StationXML
    <http://www.fdsn.org/xml/station/>`_ information for a network element.
    Downloading is performed concurrently.

    :param list routes: Routes to combine. Must belong to exclusively a single
        network code.

    .. note::

        *StationXML* :code:`BaseNodeType` elements by definition
        (http://www.fdsn.org/xml/station/fdsn-station-1.0.xsd) are ordered
        using :code:`<xs:sequence></sequence>`. This fact is used when merging
        StationXML :code`BaseNodeType` elements.

    """
    # TODO(damb): The combiner has to write metadata to the log database.
    # Also in case of errors.
    # Besides of processors this combiner has to log since it is the instance
    # collecting and analyzing DownloadTask results.

    LOGGER = 'flask.app.federator.task_combiner_stationxml'

    POOL_SIZE = 5

    NETWORK_TAG = settings.STATIONXML_ELEMENT_NETWORK
    STATION_TAG = settings.STATIONXML_ELEMENT_STATION
    CHANNEL_TAG = settings.STATIONXML_ELEMENT_CHANNEL

    def __init__(self, routes, query_params, **kwargs):

        nets = set([se.network for route in routes for se in route.streams])

        # TODO(damb): Use assert instead
        if len(nets) != 1:
            raise ValueError('Routes must belong exclusively to a single '
                             'network code.')

        super().__init__(routes, query_params, logger=self.LOGGER, **kwargs)
        self._level = self.query_params.get('level', 'station')

        self._network_elements = []
        self.path_tempfile = None

    def _clean(self, result):
        self.logger.debug('Removing temporary file {!r} ...'.format(
            result.data))
        if (result.data and self._keep_tempfiles
                not in (KeepTempfiles.ALL, KeepTempfiles.ON_ERRORS)):
            try:
                os.remove(result.data)
            except OSError:
                pass

    def _run(self):
        """
        Combine `StationXML <http://www.fdsn.org/xml/station/>`_
        :code:`<Network></Network>` information.
        """
        self.logger.info('Executing task {!r} ...'.format(self))
        self._pool = ThreadPool(processes=self._num_workers)

        for route in self._routes:
            self.logger.debug(
                'Creating DownloadTask for route {!r} ...'.format(route))
            ctx = Context()
            self._ctx.append(ctx)

            t = RawDownloadTask(GranularFdsnRequestHandler(
                route.url, route.streams[0], query_params=self.query_params),
                                decode_unicode=True,
                                context=ctx,
                                keep_tempfiles=self._keep_tempfiles,
                                http_method=self._http_method)

            # apply DownloadTask asynchronoulsy to the worker pool
            result = self._pool.apply_async(t)

            self._results.append(result)

        self._pool.close()

        # fetch results ready
        while True:
            ready = []
            for result in self._results:
                if result.ready():
                    _result = result.get()
                    if _result.status_code == 200:
                        if self._level in ('channel', 'response'):
                            # merge <Channel></Channel> elements into
                            # <Station></Station> from the correct
                            # <Network></Network> epoch element
                            for _net_element in self._extract_net_elements(
                                    _result.data):

                                # find the correct <Network></Network> epoch
                                # element
                                net_element, known = self._emerge_net_element(
                                    _net_element,
                                    exclude_tags=[
                                        '{}{}'.format(ns, self.STATION_TAG) for
                                        ns in settings.STATIONXML_NAMESPACES
                                    ])

                                if not known:
                                    continue

                                # append/merge station elements
                                for sta_element in \
                                        self._emerge_sta_elements(
                                            _net_element):
                                    self._merge_sta_element(
                                        net_element, sta_element)

                        elif self._level == 'station':
                            # append <Station></Station> elements to the
                            # corresponding <Network></Network> epoch
                            for _net_element in self._extract_net_elements(
                                    _result.data):

                                net_element, known = self._emerge_net_element(
                                    _net_element,
                                    exclude_tags=[
                                        '{}{}'.format(ns, self.STATION_TAG) for
                                        ns in settings.STATIONXML_NAMESPACES
                                    ])

                                if not known:
                                    continue

                                # append station elements
                                # NOTE(damb): <Station></Station> elements
                                # defined by multiple EIDA nodes are simply
                                # appended; no merging is performed
                                for sta_element in \
                                        self._emerge_sta_elements(
                                            _net_element):
                                    net_element.append(sta_element)

                        elif self._level == 'network':
                            for net_element in self._extract_net_elements(
                                    _result.data):
                                _, _ = self._emerge_net_element(net_element)

                        self._clean(_result)
                        self._sizes.append(_result.length)

                    else:
                        self._handle_error(_result)
                        self._sizes.append(0)

                    ready.append(result)

            for result in ready:
                self._results.remove(result)

            if not self._results:
                break

            if self._has_inactive_ctx():
                self.logger.debug('{}: Closing ...'.format(self.name))
                self._terminate()
                raise self.MissingContextLock

        self._pool.join()

        if not sum(self._sizes):
            self.logger.warning(
                'Task {!r} terminates with no valid result.'.format(self))
            return Result.nocontent(extras={'type_task': self._TYPE})

        _length = 0
        # dump xml tree for <Network></Network> epochs to temporary file
        self.path_tempfile = get_temp_filepath()
        self.logger.debug('{}: tempfile={!r}'.format(self, self.path_tempfile))
        with open(self.path_tempfile, 'wb') as ofd:
            for net_element in self._network_elements:
                s = etree.tostring(net_element)
                _length += len(s)
                ofd.write(s)

        if self._has_inactive_ctx():
            raise self.MissingContextLock

        self.logger.info(
            ('Task {!r} sucessfully finished '
             '(total bytes processed: {}, after processing: {}).').format(
                 self, sum(self._sizes), _length))

        return Result.ok(data=self.path_tempfile,
                         length=_length,
                         extras={'type_task': self._TYPE})

    def _emerge_net_element(self, net_element, exclude_tags=[]):
        """
        Emerge a :code:`<Network></Network>` epoch element. If the
        :code:`<Network></Network>` element is unknown it is automatically
        appended to the list of already existing network elements.

        :param net_element: Emerge a network epoch element
        :type net_element: :py:class:`lxml.etree.Element`
        :param list exclude_tags: List of child element tags to be excluded
            while comparing
        :returns: Tuple of :code:`net_element` or a reference to an already
            existing network epoch element and a boolean value if the network
            element already is known (:code:`True`) else :code:`False`
        :rtype: tuple
        """
        for existing_net_element in self._network_elements:
            if elements_equal(net_element,
                              existing_net_element,
                              exclude_tags,
                              recursive=True):
                return existing_net_element, True

        self._network_elements.append(net_element)
        return net_element, False

    def _emerge_sta_elements(self,
                             net_element,
                             namespaces=settings.STATIONXML_NAMESPACES):
        """
        Generator function emerging :code:`<Station><Station>` elements from
        :code:`<Network></Network>` tree.

        :param net_element: Network epoch `StationXML
        <http://www.fdsn.org/xml/station/>`_ element
        :type net_element: :py:class:`lxml.etree.Element`
        :param list namespaces: List of XML namespaces to be taken into
            consideration.
        """
        station_tags = [
            '{}{}'.format(ns, self.STATION_TAG) for ns in namespaces
        ]
        for tag in station_tags:
            for sta_element in net_element.findall(tag):
                yield sta_element

    def _emerge_cha_elements(self,
                             sta_element,
                             namespaces=settings.STATIONXML_NAMESPACES):
        """
        Generator function emerging :code:`<Channel><Channel>` elements from
        :code:`<Station></Station>` tree.
        """
        channel_tags = [
            '{}{}'.format(ns, self.CHANNEL_TAG) for ns in namespaces
        ]
        for tag in channel_tags:
            for cha_element in sta_element.findall(tag):
                yield cha_element

    def _extract_net_elements(self,
                              path_xml,
                              namespaces=settings.STATIONXML_NAMESPACES):
        """
        Extract :code:`<Network></Network>` epoch elements from `StationXML
        <http://www.fdsn.org/xml/station/>`_.

        :param str path_xml: Path to `StationXML
            <http://www.fdsn.org/xml/station/>`_ file.
        """
        network_tags = [
            '{}{}'.format(ns, self.NETWORK_TAG) for ns in namespaces
        ]

        with open(path_xml, 'rb') as ifd:
            station_xml = etree.parse(ifd).getroot()
            return [
                net_element for net_element in station_xml.iter(*network_tags)
            ]

    def _merge_sta_element(self,
                           net_element,
                           sta_element,
                           namespaces=settings.STATIONXML_NAMESPACES):
        """
        Merges a *StationXML* :code:`<Station></Station>` epoch element into a
        :code:`<Network></Network>` epoch element. Merging is performed
        recursively down to :code:`<Channel><Channel>` epochs.
        """
        # XXX(damb): Check if <Station></Station> epoch element is already
        # available - if not simply append.
        for _sta_element in net_element.iterfind(sta_element.tag):
            if elements_equal(sta_element,
                              _sta_element,
                              exclude_tags=[
                                  '{}{}'.format(ns, self.CHANNEL_TAG)
                                  for ns in namespaces
                              ],
                              recursive=False):
                # XXX(damb): Channels are ALWAYS appended; no merging is
                # performed
                for _cha_element in self._emerge_cha_elements(
                        sta_element, namespaces):
                    _sta_element.append(_cha_element)
                break

        else:
            net_element.append(sta_element)

Exemple #53

0

Afficher le fichier

 def pings(self, netlocs):
     pool = ThreadPool(processes=len(netlocs))
     rt_secs = pool.map(self.ping, netlocs, chunksize=1)
     pool.close()
     pool.join()
     return zip(netlocs, rt_secs)

Exemple #54

0

Afficher le fichier

def cross_validation(params, n_processes):
    gc.disable()
    params.model_architecture = "h-rnn-rnn"
    params.predictions_filename = 'predictions.txt'
    params.eval_batch_size = 2
    params.predict_batch_size = 2
    params.save_trans_params = True
    params.ckpt = None
    # Other
    params.gpu = None
    params.random_seed = None
    params.log_device_placement = False
    params.timeline = False
    # optimizer
    params.learning_rate = 0.01
    params.optimizer = 'adam'
    params.colocate_gradients_with_ops = True
    params.start_decay_step = 0
    params.decay_steps = 10000
    params.decay_factor = 0.98
    params.max_gradient_norm = 5.0
    # training
    params.batch_size = 2
    params.num_epochs = 10
    params.num_ckpt_epochs = 1
    # network
    params.init_op = 'uniform'
    params.init_weight = 0.1
    params.uttr_time_major = False
    params.sess_time_major = False
    params.input_emb_trainable = True
    params.out_bias = True
    params.forget_bias = 1.0
    params.connect_inp_to_out = False
    params.uttr_activation = "relu"
    params.sess_activation = "relu"
    # cnn
    params.filter_sizes = '3,4'
    params.num_filters = 10
    params.pool_size = 1
    params.padding = 'valid'
    params.stride = 1
    #network
    params.uttr_layers = 1
    params.sess_layers = 1
    params.uttr_rnn_type = 'uni'
    params.sess_rnn_type = 'uni'
    params.uttr_unit_type = 'gru'
    params.sess_unit_type = 'gru'
    params.uttr_pooling = 'last'
    params.uttr_attention_size = 32
    params.input_emb_size = 300
    params.out_dir = 'experiments/out_model/splits'
    params.n_classes = 27
    params.hparams_path = None
    # What symbols to use for unk and pad.
    params.unk = '<unk>'
    params.pad = '<pad>'
    params.feature_size = 12624
    params.data_folder = 'experiments/data/splits'
    params.n_jobs = 6

    nn_params = {
        "uttr_units": [20, 50],
        "sess_units": [None],
        "uttr_hid_to_out_dropout": [2],
        "sess_hid_to_out_dropout": [None, 10, 20]
    }
    param_combs = list(ParameterGrid(nn_params))
    print("\n")
    print("Run Cross validation for model %s and %d param combinations." %
          (params.model_architecture, len(param_combs)))
    print("\n")

    loss_cv, acc_cv, f1_cv, pr_cv, rc_cv = [], [], [], [], []

    def cross_validate_comb(params, tr_val_labels, comb, i):
        print("Run cross validation for params: %s" % comb)
        params.uttr_units = comb["uttr_units"]
        params.sess_units = comb["sess_units"]
        params.uttr_hid_to_out_dropout = comb["uttr_hid_to_out_dropout"]
        params.sess_hid_to_out_dropout = comb["sess_hid_to_out_dropout"]
        avg_loss, avg_acc, avg_f1, avg_pr, avg_rc = src.train.train.cross_validate_helper.run_cross_validate(
            params, tr_val_labels)
        results = {}
        results["avg_loss"] = avg_loss
        results["avg_acc"] = avg_acc
        results["avg_f1"] = avg_f1
        results["avg_pr"] = avg_pr
        results["avg_rc"] = avg_rc
        return results, i

    def save_async_result_to_list(result, i, result_list):
        result_list[i] = result

    def callback_error(result):
        print('error', result)

    pool = ThreadPool(processes=n_processes)
    results = [{}] * len(param_combs)
    start_time_cv = time.time()
    for i, comb in enumerate(param_combs):
        try:
            pool.apply_async(cross_validate_comb,
                             args=(params, tr_val_labels, comb, i),
                             callback=lambda result: save_async_result_to_list(
                                 result[0], result[1], results),
                             error_callback=callback_error)
        except Exception as e:
            exc_type, exc_obj, exc_tb = sys.exc_info()
            traceback.print_tb(exc_tb)
    pool.close()
    pool.join()
    print("Cross validation finished in %f secs" %
          (time.time() - start_time_cv))
    for i, res in enumerate(results):
        print("Loss for comb %d: %.3f" % (i, res["avg_loss"]))
        print("Accuracy score for comb %d: %.3f" % (i, res["avg_acc"]))
        print("F1 score for comb %d: %.3f" % (i, res["avg_f1"]))
        print("Precision for comb %d: %.3f" % (i, res["avg_pr"]))
        print("Recall for comb %d: %.3f" % (i, res["avg_rc"]))
        loss_cv.append(res["avg_loss"])
        acc_cv.append(res["avg_acc"])
        f1_cv.append(res["avg_f1"])
        pr_cv.append(res["avg_pr"])
        rc_cv.append(res["avg_rc"])
    loss_min_idx = np.argmax(loss_cv)
    acc_max_idx = np.argmax(acc_cv)
    f1_max_idx = np.argmax(f1_cv)
    pr_max_idx = np.argmax(pr_cv)
    rc_max_idx = np.argmax(rc_cv)

    print("Min Loss score: %.3f for params %s" %
          (loss_cv[loss_min_idx], param_combs[loss_min_idx]))
    print("Max Accuracy score: %.3f for params %s" %
          (acc_cv[acc_max_idx], param_combs[acc_max_idx]))
    print("Max F1 score: %.3f for params %s" %
          (f1_cv[f1_max_idx], param_combs[f1_max_idx]))
    print("Max Precision: %.3f for params %s" %
          (pr_cv[pr_max_idx], param_combs[pr_max_idx]))
    print("Max Recall: %.3f for params %s" %
          (rc_cv[rc_max_idx], param_combs[rc_max_idx]))
    gc.enable()

Exemple #55

0

Afficher le fichier

    def get_routemanagers(self):
        from multiprocessing.pool import ThreadPool
        global mode_mapping

        # returns list of routemanagers with area IDs
        areas = {}
        area_arr = self.__raw_json["areas"]

        thread_pool = ThreadPool(processes=4)

        areas_procs = {}
        for area in area_arr:
            if area["geofence_included"] is None:
                raise RuntimeError("Cannot work without geofence_included")

            geofence_included = Path(area["geofence_included"])
            if not geofence_included.is_file():
                log.error("Geofence included file configured does not exist")
                sys.exit(1)

            geofence_excluded_raw_path = area.get("geofence_excluded", None)
            if geofence_excluded_raw_path is not None:
                geofence_excluded = Path(geofence_excluded_raw_path)
                if not geofence_excluded.is_file():
                    log.error("Geofence excluded specified but does not exist")
                    sys.exit(1)

            area_dict = {
                "mode": area["mode"],
                "geofence_included": area["geofence_included"],
                "geofence_excluded": area.get("geofence_excluded", None),
                "routecalc": area["routecalc"]
            }
            # also build a routemanager for each area...

            # grab coords
            # first check if init is false or raids_ocr is set as mode, if so, grab the coords from DB
            # coords = np.loadtxt(area["coords"], delimiter=',')
            geofence_helper = GeofenceHelper(
                area["geofence_included"], area.get("geofence_excluded", None))
            mode = area["mode"]
            # build routemanagers
            if mode == "raids_ocr" or mode == "raids_mitm":
                route_manager = RouteManagerRaids(
                    self.db_wrapper,
                    None,
                    mode_mapping[area["mode"]]["range"],
                    mode_mapping[area["mode"]]["max_count"],
                    area["geofence_included"],
                    area.get("geofence_excluded", None),
                    area["routecalc"],
                    mode=area["mode"],
                    settings=area.get("settings", None),
                    init=area.get("init", False),
                    name=area.get("name", "unknown"))
            elif mode == "mon_mitm":
                route_manager = RouteManagerMon(
                    self.db_wrapper,
                    None,
                    mode_mapping[area["mode"]]["range"],
                    mode_mapping[area["mode"]]["max_count"],
                    area["geofence_included"],
                    area.get("geofence_excluded", None),
                    area["routecalc"],
                    mode=area["mode"],
                    coords_spawns_known=area.get("coords_spawns_known", False),
                    init=area.get("init", False),
                    name=area.get("name", "unknown"),
                    settings=area.get("settings", None))
            elif mode == "iv_mitm":
                route_manager = RouteManagerIV(
                    self.db_wrapper,
                    None,
                    0,
                    999999,
                    area["geofence_included"],
                    area.get("geofence_excluded", None),
                    area["routecalc"],
                    name=area.get("name", "unknown"),
                    settings=area.get("settings", None),
                    mode=mode)
            elif mode == "pokestops":
                route_manager = RouteManagerMon(
                    self.db_wrapper,
                    None,
                    mode_mapping[area["mode"]]["range"],
                    mode_mapping[area["mode"]]["max_count"],
                    area["geofence_included"],
                    area.get("geofence_excluded", None),
                    area["routecalc"],
                    mode=area["mode"],
                    init=area.get("init", False),
                    name=area.get("name", "unknown"),
                    settings=area.get("settings", None))
            else:
                log.error("Invalid mode found in mapping parser.")
                sys.exit(1)

            if not mode == "iv_mitm":
                if mode == "raids_ocr" or area.get("init", False) is False:
                    # grab data from DB depending on mode
                    # TODO: move routemanagers to factory
                    if mode == "raids_ocr" or mode == "raids_mitm":
                        coords = self.db_wrapper.gyms_from_db(geofence_helper)
                    elif mode == "mon_mitm":
                        spawn_known = area.get("coords_spawns_known", False)
                        if spawn_known:
                            log.info("Reading known Spawnpoints from DB")
                            coords = self.db_wrapper.get_detected_spawns(
                                geofence_helper)
                        else:
                            log.info("Reading unknown Spawnpoints from DB")
                            coords = self.db_wrapper.get_undetected_spawns(
                                geofence_helper)
                    elif mode == "pokestops":
                        coords = self.db_wrapper.stops_from_db(geofence_helper)
                    else:
                        log.fatal("Mode not implemented yet: %s" % str(mode))
                        exit(1)
                else:
                    # calculate all level N cells (mapping back from mapping above linked to mode)
                    # coords = S2Helper.get_s2_cells_from_fence(geofence=geofence_helper,
                    #                                           cell_size=mode_mapping[mode]["s2_cell_level"])
                    coords = S2Helper._generate_locations(
                        mode_mapping[area["mode"]]["range"], geofence_helper)

                route_manager.add_coords_list(coords)
                max_radius = mode_mapping[area["mode"]]["range"]
                max_count_in_radius = mode_mapping[area["mode"]]["max_count"]
                if not area.get("init", False):
                    log.info("Calculating route for %s" %
                             str(area.get("name", "unknown")))
                    proc = thread_pool.apply_async(route_manager.recalc_route,
                                                   args=(max_radius,
                                                         max_count_in_radius,
                                                         0, False))
                    areas_procs[area["name"]] = proc
                else:
                    log.info(
                        "Init mode enabled and more than 400 coords in init. Going row-based for %s"
                        % str(area.get("name", "unknown")))
                    # we are in init, let's write the init route to file to make it visible in madmin
                    if area["routecalc"] is not None:
                        routefile = area["routecalc"]
                        if os.path.isfile(routefile + '.calc'):
                            os.remove(routefile + '.calc')
                        with open(routefile + '.calc', 'a') as f:
                            for loc in coords:
                                f.write(
                                    str(loc.lat) + ', ' + str(loc.lng) + '\n')
                    # gotta feed the route to routemanager... TODO: without recalc...
                    proc = thread_pool.apply_async(route_manager.recalc_route,
                                                   args=(1, 99999999, 0,
                                                         False))
                    areas_procs[area["name"]] = proc
            # log.error("Calculated route, appending another coord and recalculating")

            area_dict["routemanager"] = route_manager
            areas[area["name"]] = area_dict

        for area in areas_procs.keys():
            to_be_checked = areas_procs[area]
            log.debug(to_be_checked)
            to_be_checked.get()

        thread_pool.close()
        thread_pool.join()
        return areas

Exemple #56

0

Afficher le fichier

Fichier : uploader.py Projet : zhongpan/conan

class CmdUpload(object):
    """ This class is responsible for uploading packages to remotes. The flow is:
    - Collect all the data from the local cache:
        - Collect the refs that matches the given pattern _collect_refs_to_upload
        - Collect for every ref all the binaries IDs that has to be uploaded
          "_collect_packages_to_upload". This may discard binaries that do not
          belong to the current RREV
        The collection of this does the interactivity (ask user if yes/no),
        the errors (don't upload packages with policy=build_always, and computing
        the full REVISIONS for every that has to be uploaded.
        No remote API calls are done in this step, everything is local
    - Execute the upload. For every ref:
        - Upload the recipe of the ref: "_upload_recipe"
            - If not FORCE, check the date "_check_recipe_date", i.e. if there are
              changes, do not allow uploading if the remote date is newer than the
              local cache one
            - Retrieve the sources (exports_sources), if they are not cached, and
              uploading to a different remote. "complete_recipe_sources"
            - Gather files and create 2 .tgz (exports, exports_sources) with
              "_compress_recipe_files"
            - Decide which files have to be uploaded and deleted from the server
              based on the different with the remote snapshot "_recipe_files_to_upload"
              This can raise if upload policy is not overwrite
            - Execute the real transfer "remote_manager.upload_recipe()"
        - For every package_id of every ref: "_upload_package"
            - Gather files and create package.tgz. "_compress_package_files"
            - (Optional) Do the integrity check of the package
            - Decide which files to upload and delete from server:
              "_package_files_to_upload". Can raise if policy is NOT overwrite
            - Do the actual upload

    All the REVISIONS are local defined, not retrieved from servers

    This requires calling to the remote API methods:
    - get_recipe_sources() to get the export_sources if they are missing
    - get_recipe_snapshot() to do the diff and know what files to upload
    - get_package_snapshot() to do the diff and know what files to upload
    - get_recipe_manifest() to check the date and raise if policy requires
    - get_package_manifest() to raise if policy!=force and manifests change
    """
    def __init__(self, cache, user_io, remote_manager, loader, hook_manager):
        self._cache = cache
        self._user_io = user_io
        self._output = progress_bar.ProgressOutput(self._user_io.out)
        self._remote_manager = remote_manager
        self._loader = loader
        self._hook_manager = hook_manager
        self._upload_thread_pool = None
        self._exceptions_list = []

    def upload(self,
               reference_or_pattern,
               remotes,
               upload_recorder,
               package_id=None,
               all_packages=None,
               confirm=False,
               retry=None,
               retry_wait=None,
               integrity_check=False,
               policy=None,
               query=None,
               parallel_upload=False):
        t1 = time.time()
        refs, confirm = self._collects_refs_to_upload(package_id,
                                                      reference_or_pattern,
                                                      confirm)
        refs_by_remote = self._collect_packages_to_upload(
            refs, confirm, remotes, all_packages, query, package_id)

        if parallel_upload:
            self._upload_thread_pool = ThreadPool(8)
            self._user_io.disable_input()
        else:
            self._upload_thread_pool = ThreadPool(1)

        for remote, refs in refs_by_remote.items():
            self._output.info("Uploading to remote '{}':".format(remote.name))

            def upload_ref(ref_conanfile_prefs):
                _ref, _conanfile, _prefs = ref_conanfile_prefs
                self._upload_ref(_conanfile, _ref, _prefs, retry, retry_wait,
                                 integrity_check, policy, remote,
                                 upload_recorder, remotes)

            self._upload_thread_pool.map(upload_ref,
                                         [(ref, conanfile, prefs)
                                          for (ref, conanfile, prefs) in refs])
            self._upload_thread_pool.close()
            self._upload_thread_pool.join()
            for exception in self._exceptions_list:
                self._output.error(str(exception))

            if len(self._exceptions_list) > 0:
                raise ConanException("Errors uploading some packages")

        logger.debug("UPLOAD: Time manager upload: %f" % (time.time() - t1))

    def _collects_refs_to_upload(self, package_id, reference_or_pattern,
                                 confirm):
        """ validate inputs and compute the refs (without revisions) to be uploaded
        """
        if package_id and not check_valid_ref(reference_or_pattern,
                                              strict_mode=False):
            raise ConanException(
                "-p parameter only allowed with a valid recipe reference, "
                "not with a pattern")

        if package_id or check_valid_ref(reference_or_pattern):
            # Upload package
            ref = ConanFileReference.loads(reference_or_pattern)
            if ref.revision and not self._cache.config.revisions_enabled:
                raise ConanException(
                    "Revisions not enabled in the client, specify a "
                    "reference without revision")
            refs = [
                ref,
            ]
            confirm = True
        else:
            refs = search_recipes(self._cache, reference_or_pattern)
            if not refs:
                raise NotFoundException(
                    ("No packages found matching pattern '%s'" %
                     reference_or_pattern))
        return refs, confirm

    def _collect_packages_to_upload(self, refs, confirm, remotes, all_packages,
                                    query, package_id):
        """ compute the references with revisions and the package_ids to be uploaded
        """
        # Group recipes by remote
        refs_by_remote = defaultdict(list)

        for ref in refs:
            metadata = self._cache.package_layout(ref).load_metadata()
            if ref.revision and ref.revision != metadata.recipe.revision:
                raise ConanException(
                    "Recipe revision {} does not match the one stored in the cache {}"
                    .format(ref.revision, metadata.recipe.revision))
            ref = ref.copy_with_rev(metadata.recipe.revision)
            remote = remotes.selected
            if remote:
                ref_remote = remote
            else:
                ref_remote = metadata.recipe.remote
                ref_remote = remotes.get_remote(ref_remote)

            upload = True
            if not confirm:
                msg = "Are you sure you want to upload '%s' to '%s'?" % (
                    str(ref), ref_remote.name)
                upload = self._user_io.request_boolean(msg)
            if upload:
                try:
                    conanfile_path = self._cache.package_layout(
                        ref).conanfile()
                    conanfile = self._loader.load_basic(conanfile_path)
                except NotFoundException:
                    raise NotFoundException(
                        ("There is no local conanfile exported as %s" %
                         str(ref)))

                # TODO: This search of binary packages has to be improved, more robust
                # So only real packages are retrieved
                if all_packages or query:
                    if all_packages:
                        query = None
                    # better to do a search, that will retrieve real packages with ConanInfo
                    # Not only "package_id" folders that could be empty
                    package_layout = self._cache.package_layout(
                        ref.copy_clear_rev())
                    packages = search_packages(package_layout, query)
                    packages_ids = list(packages.keys())
                elif package_id:
                    packages_ids = [
                        package_id,
                    ]
                else:
                    packages_ids = []
                if packages_ids:
                    if conanfile.build_policy == "always":
                        raise ConanException(
                            "Conanfile '%s' has build_policy='always', "
                            "no packages can be uploaded" % str(ref))
                prefs = []
                # Gather all the complete PREFS with PREV
                for package in packages_ids:
                    package_id, prev = package.split(
                        "#") if "#" in package else (package, None)
                    if package_id not in metadata.packages:
                        raise ConanException("Binary package %s:%s not found" %
                                             (str(ref), package_id))
                    if prev and prev != metadata.packages[package_id].revision:
                        raise ConanException(
                            "Binary package %s:%s#%s not found" %
                            (str(ref), package_id, prev))
                    # Filter packages that don't match the recipe revision
                    if self._cache.config.revisions_enabled and ref.revision:
                        rec_rev = metadata.packages[package_id].recipe_revision
                        if ref.revision != rec_rev:
                            self._output.warn(
                                "Skipping package '%s', it doesn't belong to the"
                                " current recipe revision" % package_id)
                            continue
                    package_revision = metadata.packages[package_id].revision
                    assert package_revision is not None, "PREV cannot be None to upload"
                    prefs.append(
                        PackageReference(ref, package_id, package_revision))
                refs_by_remote[ref_remote].append((ref, conanfile, prefs))

        return refs_by_remote

    def _upload_ref(self, conanfile, ref, prefs, retry, retry_wait,
                    integrity_check, policy, recipe_remote, upload_recorder,
                    remotes):
        """ Uploads the recipes and binaries identified by ref
        """
        assert (ref.revision
                is not None), "Cannot upload a recipe without RREV"
        conanfile_path = self._cache.package_layout(ref).conanfile()
        # FIXME: I think it makes no sense to specify a remote to "pre_upload"
        # FIXME: because the recipe can have one and the package a different one
        self._hook_manager.execute("pre_upload",
                                   conanfile_path=conanfile_path,
                                   reference=ref,
                                   remote=recipe_remote)
        msg = "\rUploading %s to remote '%s'" % (str(ref), recipe_remote.name)
        self._output.info(left_justify_message(msg))
        try:
            self._upload_recipe(ref, conanfile, retry, retry_wait, policy,
                                recipe_remote, remotes)
            upload_recorder.add_recipe(ref, recipe_remote.name,
                                       recipe_remote.url)
        except ConanException as exc:
            self._exceptions_list.append(exc)
            return

        # Now the binaries
        if prefs:
            total = len(prefs)
            p_remote = recipe_remote

            def upload_package_index(index_pref):
                try:
                    index, pref = index_pref
                    up_msg = "\rUploading package %d/%d: %s to '%s'" % (
                        index + 1, total, str(pref.id), p_remote.name)
                    self._output.info(left_justify_message(up_msg))
                    self._upload_package(pref, retry, retry_wait,
                                         integrity_check, policy, p_remote)
                    upload_recorder.add_package(pref, p_remote.name,
                                                p_remote.url)
                    return conanfile_path, ref, recipe_remote, None
                except ConanException as exc:
                    return None, None, None, exc

            def upload_package_callback(ret):
                for cf_path, r_ref, r_rem, exc in ret:
                    if exc is None:
                        # FIXME: I think it makes no sense to specify a remote to "post_upload"
                        # FIXME: because the recipe can have one and the package a different one
                        self._hook_manager.execute("post_upload",
                                                   conanfile_path=cf_path,
                                                   reference=r_ref,
                                                   remote=r_rem)
                    else:
                        self._exceptions_list.append(exc)

            # This doesn't wait for the packages to end, so the function returns
            # and the "pool entry" for the recipe is released
            self._upload_thread_pool.map_async(
                upload_package_index,
                [(index, pref) for index, pref in enumerate(prefs)],
                callback=upload_package_callback)
        else:
            # FIXME: I think it makes no sense to specify a remote to "post_upload"
            # FIXME: because the recipe can have one and the package a different one
            self._hook_manager.execute("post_upload",
                                       conanfile_path=conanfile_path,
                                       reference=ref,
                                       remote=recipe_remote)

    def _upload_recipe(self, ref, conanfile, retry, retry_wait, policy, remote,
                       remotes):

        current_remote_name = self._cache.package_layout(
            ref).load_metadata().recipe.remote

        if remote.name != current_remote_name:
            complete_recipe_sources(self._remote_manager, self._cache,
                                    conanfile, ref, remotes)

        conanfile_path = self._cache.package_layout(ref).conanfile()
        self._hook_manager.execute("pre_upload_recipe",
                                   conanfile_path=conanfile_path,
                                   reference=ref,
                                   remote=remote)

        t1 = time.time()
        the_files = self._compress_recipe_files(ref)

        with self._cache.package_layout(ref).update_metadata() as metadata:
            metadata.recipe.checksums = calc_files_checksum(the_files)

        local_manifest = FileTreeManifest.loads(
            load(the_files["conanmanifest.txt"]))

        remote_manifest = None
        if policy != UPLOAD_POLICY_FORCE:
            # Check SCM data for auto fields
            if hasattr(conanfile,
                       "scm") and (conanfile.scm.get("url") == "auto"
                                   or conanfile.scm.get("revision") == "auto"):
                raise ConanException(
                    "The recipe has 'scm.url' or 'scm.revision' with 'auto' "
                    "values. Use '--force' to ignore this error or export again "
                    "the recipe ('conan export' or 'conan create') in a "
                    "repository with no-uncommitted changes or by "
                    "using the '--ignore-dirty' option")

            remote_manifest = self._check_recipe_date(ref, remote,
                                                      local_manifest)
        if policy == UPLOAD_POLICY_SKIP:
            return ref

        files_to_upload, deleted = self._recipe_files_to_upload(
            ref, policy, the_files, remote, remote_manifest, local_manifest)

        if files_to_upload or deleted:
            self._remote_manager.upload_recipe(ref, files_to_upload, deleted,
                                               remote, retry, retry_wait)
            self._upload_recipe_end_msg(ref, remote)
        else:
            self._output.info("Recipe is up to date, upload skipped")
        duration = time.time() - t1
        log_recipe_upload(ref, duration, the_files, remote.name)
        self._hook_manager.execute("post_upload_recipe",
                                   conanfile_path=conanfile_path,
                                   reference=ref,
                                   remote=remote)

        # The recipe wasn't in the registry or it has changed the revision field only
        if not current_remote_name:
            with self._cache.package_layout(ref).update_metadata() as metadata:
                metadata.recipe.remote = remote.name

        return ref

    def _upload_package(self,
                        pref,
                        retry=None,
                        retry_wait=None,
                        integrity_check=False,
                        policy=None,
                        p_remote=None):

        assert (pref.revision
                is not None), "Cannot upload a package without PREV"
        assert (pref.ref.revision
                is not None), "Cannot upload a package without RREV"

        conanfile_path = self._cache.package_layout(pref.ref).conanfile()
        self._hook_manager.execute("pre_upload_package",
                                   conanfile_path=conanfile_path,
                                   reference=pref.ref,
                                   package_id=pref.id,
                                   remote=p_remote)

        t1 = time.time()
        the_files = self._compress_package_files(pref, integrity_check)

        with self._cache.package_layout(
                pref.ref).update_metadata() as metadata:
            metadata.packages[pref.id].checksums = calc_files_checksum(
                the_files)

        if policy == UPLOAD_POLICY_SKIP:
            return None
        files_to_upload, deleted = self._package_files_to_upload(
            pref, policy, the_files, p_remote)

        if files_to_upload or deleted:
            self._remote_manager.upload_package(pref, files_to_upload, deleted,
                                                p_remote, retry, retry_wait)
            logger.debug("UPLOAD: Time upload package: %f" %
                         (time.time() - t1))
        else:
            self._output.info("Package is up to date, upload skipped")

        duration = time.time() - t1
        log_package_upload(pref, duration, the_files, p_remote)
        self._hook_manager.execute("post_upload_package",
                                   conanfile_path=conanfile_path,
                                   reference=pref.ref,
                                   package_id=pref.id,
                                   remote=p_remote)

        logger.debug("UPLOAD: Time uploader upload_package: %f" %
                     (time.time() - t1))

        metadata = self._cache.package_layout(pref.ref).load_metadata()
        cur_package_remote = metadata.packages[pref.id].remote
        if not cur_package_remote and policy != UPLOAD_POLICY_SKIP:
            with self._cache.package_layout(
                    pref.ref).update_metadata() as metadata:
                metadata.packages[pref.id].remote = p_remote.name

        return pref

    def _compress_recipe_files(self, ref):
        export_folder = self._cache.package_layout(ref).export()

        for f in (EXPORT_TGZ_NAME, EXPORT_SOURCES_TGZ_NAME):
            tgz_path = os.path.join(export_folder, f)
            if is_dirty(tgz_path):
                self._output.warn("%s: Removing %s, marked as dirty" %
                                  (str(ref), f))
                os.remove(tgz_path)
                clean_dirty(tgz_path)

        files, symlinks = gather_files(export_folder)
        if CONANFILE not in files or CONAN_MANIFEST not in files:
            raise ConanException("Cannot upload corrupted recipe '%s'" %
                                 str(ref))
        export_src_folder = self._cache.package_layout(ref).export_sources()
        src_files, src_symlinks = gather_files(export_src_folder)
        the_files = _compress_recipe_files(files, symlinks, src_files,
                                           src_symlinks, export_folder,
                                           self._output)

        return the_files

    def _compress_package_files(self, pref, integrity_check):

        t1 = time.time()
        # existing package, will use short paths if defined
        package_folder = self._cache.package_layout(
            pref.ref, short_paths=None).package(pref)

        if is_dirty(package_folder):
            raise ConanException("Package %s is corrupted, aborting upload.\n"
                                 "Remove it with 'conan remove %s -p=%s'" %
                                 (pref, pref.ref, pref.id))
        tgz_path = os.path.join(package_folder, PACKAGE_TGZ_NAME)
        if is_dirty(tgz_path):
            self._output.warn("%s: Removing %s, marked as dirty" %
                              (str(pref), PACKAGE_TGZ_NAME))
            os.remove(tgz_path)
            clean_dirty(tgz_path)
        # Get all the files in that directory
        files, symlinks = gather_files(package_folder)

        if CONANINFO not in files or CONAN_MANIFEST not in files:
            logger.error("Missing info or manifest in uploading files: %s" %
                         (str(files)))
            raise ConanException("Cannot upload corrupted package '%s'" %
                                 str(pref))

        logger.debug("UPLOAD: Time remote_manager build_files_set : %f" %
                     (time.time() - t1))
        if integrity_check:
            self._package_integrity_check(pref, files, package_folder)
            logger.debug(
                "UPLOAD: Time remote_manager check package integrity : %f" %
                (time.time() - t1))

        the_files = _compress_package_files(files, symlinks, package_folder,
                                            self._output)
        return the_files

    def _recipe_files_to_upload(self, ref, policy, the_files, remote,
                                remote_manifest, local_manifest):
        self._remote_manager.check_credentials(remote)
        remote_snapshot = self._remote_manager.get_recipe_snapshot(ref, remote)
        files_to_upload = {
            filename.replace("\\", "/"): path
            for filename, path in the_files.items()
        }
        if not remote_snapshot:
            return files_to_upload, set()

        deleted = set(remote_snapshot).difference(the_files)
        if policy != UPLOAD_POLICY_FORCE:
            if remote_manifest is None:
                # This is the weird scenario, we have a snapshot but don't have a manifest.
                # Can be due to concurrency issues, so we can try retrieve it now
                try:
                    remote_manifest, _ = self._remote_manager.get_recipe_manifest(
                        ref, remote)
                except NotFoundException:
                    # This is weird, the manifest still not there, better upload everything
                    self._output.warn(
                        "The remote recipe doesn't have the 'conanmanifest.txt' "
                        "file and will be uploaded: '{}'".format(ref))
                    return files_to_upload, deleted

            if remote_manifest == local_manifest:
                return None, None

            if policy in (UPLOAD_POLICY_NO_OVERWRITE,
                          UPLOAD_POLICY_NO_OVERWRITE_RECIPE):
                raise ConanException(
                    "Local recipe is different from the remote recipe. "
                    "Forbidden overwrite.")

        return files_to_upload, deleted

    def _package_files_to_upload(self, pref, policy, the_files, remote):
        self._remote_manager.check_credentials(remote)
        remote_snapshot = self._remote_manager.get_package_snapshot(
            pref, remote)

        if remote_snapshot and policy != UPLOAD_POLICY_FORCE:
            if not is_package_snapshot_complete(remote_snapshot):
                return the_files, set([])
            remote_manifest, _ = self._remote_manager.get_package_manifest(
                pref, remote)
            local_manifest = FileTreeManifest.loads(
                load(the_files["conanmanifest.txt"]))
            if remote_manifest == local_manifest:
                return None, None
            if policy == UPLOAD_POLICY_NO_OVERWRITE:
                raise ConanException(
                    "Local package is different from the remote package. Forbidden"
                    " overwrite.")
        deleted = set(remote_snapshot).difference(the_files)
        return the_files, deleted

    def _upload_recipe_end_msg(self, ref, remote):
        msg = "\rUploaded conan recipe '%s' to '%s'" % (str(ref), remote.name)
        url = remote.url.replace("https://api.bintray.com/conan",
                                 "https://bintray.com")
        msg += ": %s" % url
        self._output.info(left_justify_message(msg))

    def _package_integrity_check(self, pref, files, package_folder):
        # If package has been modified remove tgz to regenerate it
        self._output.rewrite_line("Checking package integrity...")

        # short_paths = None is enough if there exist short_paths
        layout = self._cache.package_layout(pref.ref, short_paths=None)
        read_manifest, expected_manifest = layout.package_manifests(pref)

        if read_manifest != expected_manifest:
            self._output.writeln("")
            diff = read_manifest.difference(expected_manifest)
            for fname, (h1, h2) in diff.items():
                self._output.warn(
                    "Mismatched checksum '%s' (manifest: %s, file: %s)" %
                    (fname, h1, h2))

            if PACKAGE_TGZ_NAME in files:
                try:
                    tgz_path = os.path.join(package_folder, PACKAGE_TGZ_NAME)
                    os.unlink(tgz_path)
                except Exception:
                    pass
            error_msg = os.linesep.join(
                "Mismatched checksum '%s' (manifest: %s, file: %s)" %
                (fname, h1, h2) for fname, (h1, h2) in diff.items())
            logger.error("Manifests doesn't match!\n%s" % error_msg)
            raise ConanException("Cannot upload corrupted package '%s'" %
                                 str(pref))
        else:
            self._output.rewrite_line("Package integrity OK!")
        self._output.writeln("")

    def _check_recipe_date(self, ref, remote, local_manifest):
        try:
            remote_recipe_manifest, ref = self._remote_manager.get_recipe_manifest(
                ref, remote)
        except NotFoundException:
            return  # First time uploading this package

        if (remote_recipe_manifest != local_manifest
                and remote_recipe_manifest.time > local_manifest.time):
            self._print_manifest_information(remote_recipe_manifest,
                                             local_manifest, ref, remote)
            raise ConanException(
                "Remote recipe is newer than local recipe: "
                "\n Remote date: %s\n Local date: %s" %
                (remote_recipe_manifest.time, local_manifest.time))

        return remote_recipe_manifest

    def _print_manifest_information(self, remote_recipe_manifest,
                                    local_manifest, ref, remote):
        try:
            self._output.info("\n%s" % ("-" * 40))
            self._output.info("Remote manifest:")
            self._output.info(remote_recipe_manifest)
            self._output.info("Local manifest:")
            self._output.info(local_manifest)
            difference = remote_recipe_manifest.difference(local_manifest)
            if "conanfile.py" in difference:
                contents = load(self._cache.package_layout(ref).conanfile())
                endlines = "\\r\\n" if "\r\n" in contents else "\\n"
                self._output.info("Local 'conanfile.py' using '%s' line-ends" %
                                  endlines)
                remote_contents = self._remote_manager.get_recipe_path(
                    ref, path="conanfile.py", remote=remote)
                endlines = "\\r\\n" if "\r\n" in remote_contents else "\\n"
                self._output.info(
                    "Remote 'conanfile.py' using '%s' line-ends" % endlines)
            self._output.info("\n%s" % ("-" * 40))
        except Exception as e:
            self._output.info("Error printing information about the diff: %s" %
                              str(e))

Exemple #57

0

Afficher le fichier

def index_images(paths,
                 aspect_ratio,
                 height,
                 width,
                 nchannels=3,
                 vectorization_scaling_factor=1,
                 index_class=faiss.IndexFlatL2,
                 verbose=1,
                 caching=True,
                 use_detect_faces=False,
                 nprocesses=4):
    """
    @param: paths (list of Strings OR glob pattern string) image paths to load
    @param: aspect_ratio (float) height / width
    @param: height (int) desired height of tile images
    @param: width (int) desired width of tile images
    @param: nchannels (int) number of channels in image
    @param: vectorization_scaling_factor (float) the factor to multiply by for the vectorization
            values smaller than 1 will save memory space at the cost of quality of matches because the
            image will be downsized before vectorization
    @param: index_class (Faiss Index class) the ANN class to lookup codebook images with
    """
    try:
        # index our images
        vectorization_dimensionality = int(height * width * nchannels *
                                           vectorization_scaling_factor)
        index = index_class(vectorization_dimensionality)

        # create our pool and go!
        starttime = time.time()

        if isinstance(paths, str):
            # paths is a glob pattern like: 'images/blah/*.jpg'
            paths = glob.glob(paths)

        # should we retrieve a cached index?
        if caching:
            print("Caching is ON, checking for previously cached index...")
            cache = MosaicCacheConfig(paths=paths,
                                      height=height,
                                      width=width,
                                      nchannels=nchannels,
                                      index_class=index_class,
                                      dimensions=vectorization_dimensionality,
                                      detect_faces=use_detect_faces)
            cached = cache.load()
            if cached is not None:
                print("Found cached index, reading from disk...")
                return cached['index'], cached['images'], cached['tile_images']
            else:
                print("No cached index found, creating from scratch...")

        # nothing cached, let's index
        path_jobs = [(p, height, width, nchannels, aspect_ratio,
                      use_detect_faces) for p in paths]  #[:200]
        pool = ThreadPool(nprocesses)
        results = pool.map(load_and_vectorize_image, path_jobs)
        pool.close()

        # how fast did we go?
        elapsed = time.time() - starttime
        if verbose:
            print("Indexing: %d images, %.4f seconds (%.4f per image)" %
                  (len(path_jobs), elapsed, elapsed / len(path_jobs)))

        # get the results, store in ordered (indexed) list
        images = []
        vectors = []
        for image, vector in results:
            if image is not None and vector is not None:
                if use_detect_faces and not image.faces:
                    # if we're told to use faces, skip any images
                    # without them
                    continue
                vectors.append(vector)
                images.append(image)

        if use_detect_faces:
            print("Using only images with faces: total=%d, withfaces=%d" %
                  (len(results), len(images)))

            if not images:
                print(
                    "No images contained faces :( Exiting and returning None's"
                )
                return None, None, None

        # create matrix and index
        matrix = np.array(vectors).reshape(-1, vectorization_dimensionality)
        index.add(matrix)

        # resize images to tiles
        if verbose:
            print("Resizing images to (%d, %d)..." % (height, width))
        tile_images = []
        for image in images:
            img = image.load_image()
            img_h, img_w, _ = img.shape
            tile = cv2.resize(img,
                              None,
                              fx=height / float(img_h),
                              fy=width / float(img_w),
                              interpolation=cv2.INTER_AREA)
            tile_images.append(tile)

        if caching:
            print("Caching index to disk...")
            cache.save(matrix, images, tile_images)

        return index, images, tile_images

    except Exception:
        import traceback
        print(traceback.format_exc())
        import ipdb
        ipdb.set_trace()
        return None, None, None

Exemple #58

0

Afficher le fichier

Fichier : JuicerAdmin.py Projet : bserdar/juicer

        # Repos to create/update, sorted by environment.
        repo_objects_create = []
        repo_objects_update = {}
        for env in all_envs:
            repo_objects_update[env] = []

        # All repo defs as Repo objects
        all_repos = [JuicerRepo(repo['name'], repo_def=repo) for repo in repo_defs]

        # Detailed information on all existing repos
        existing_repos = {}
        repo_pool = ThreadPool()

        # Parallelize getting the repo lists
        env_results = [repo_pool.apply_async(self.list_repos, tuple(), kwds={'envs': [er]}, callback=existing_repos.update) for er in all_envs]
        repo_pool.close()

        for result_async in env_results:
            result_async.wait()

        repo_pool.join()

        for repo in all_repos:
            # 'env' is all environments if: 'env' is not defined; 'env' is an empty list
            current_env = repo.get('env', [])
            if current_env == []:
                juicer.utils.Log.log_debug("Setting 'env' to all_envs for repo: %s" % repo['name'])
                repo['env'] = all_envs

        #  Assemble a set of all specified environments.
        defined_envs = juicer.utils.unique_repo_def_envs(all_repos)

Exemple #59

0

Afficher le fichier

class Scheduler(MooseObject):
    """
    Base class for handling jobs asynchronously. To use this class, call .schedule()
    and supply a list of testers to schedule. Each group of testers supplied will begin
    running immediately.

    Syntax:
       .schedule([list of tester objects])

    A list of testers will be added to a queue and begin calling their derived run method.
    You can continue to add more testers to the queue in this fashion.

    Once you schedule all the testers you wish to test, call .waitFinish() to wait until
    all testers have finished.

    """
    @staticmethod
    def validParams():
        params = MooseObject.validParams()
        params.addRequiredParam('average_load', 64.0, "Average load to allow")
        params.addRequiredParam('max_processes', None,
                                "Hard limit of maxium processes to use")

        return params

    # This is what will be checked for when we look for valid schedulers
    IS_SCHEDULER = True

    def __init__(self, harness, params):
        MooseObject.__init__(self, harness, params)

        ## The test harness to run callbacks on
        self.harness = harness

        # Retrieve and store the TestHarness options for use in this object
        self.options = harness.getOptions()

        # The Scheduler class can be initialized with no "max_processes" argument and it'll default
        # to a soft limit. If however a max_processes is passed we'll treat it as a hard limit.
        # The difference is whether or not we allow single jobs to exceed the number of slots.
        if params['max_processes'] == None:
            self.available_slots = 1
            self.soft_limit = True
        else:
            self.available_slots = params['max_processes']  # hard limit
            self.soft_limit = False

        # Requested average load level to stay below
        self.average_load = params['average_load']

        # The time the status queue reported no activity to the TestHarness
        self.last_reported = clock()

        # A set containing jobs that have been reported
        self.jobs_reported = set([])

        # Initialize run_pool based on available slots
        self.run_pool = ThreadPool(processes=self.available_slots)

        # Initialize status_pool to only use 1 process (to prevent status messages from getting clobbered)
        self.status_pool = ThreadPool(processes=1)

        # Slot Lock when processing resource allocations
        self.slot_lock = threading.Lock()

        # DAG Lock when processing the DAG
        self.dag_lock = threading.Lock()

        # Workers in use (single job might request multiple slots)
        self.slots_in_use = 0

        # Jobs waiting to finish (includes actively running jobs)
        self.job_queue_count = 0

        # Set containing our Job containers. We use this in the event of a KeyboardInterrupt to
        # iterate over and kill any subprocesses
        self.tester_datas = set([])

    def killRemaining(self):
        """
        Method to kill any running subprocess started by the Scheduler. This also
        closes the status pool to prevent further statuses from printing to the
        screen.
        """
        self.run_pool.close()
        self.status_pool.close()

        for tester_data in self.tester_datas:
            tester_data.killProcess()
        self.job_queue_count = 0

    def reportSkipped(self, jobs):
        """
        Allow derived schedulers to do something with skipped jobs
        """
        return

    def preLaunch(self, job_dag):
        """
        Allow derived schedulers to modify the DAG before jobs are launched
        """
        return

    def run(self, job_container):
        """ Call derived run method """
        return

    def postRun(self, job_container):
        """
        Allow derived schdulers to perform post run methods on job
        """
        return

    def cleanUp(self):
        """ Allow derived schedulers to perform cleanup operations """
        return

    def notifyFinishedSchedulers(self):
        """ Notify derived schedulers we are finished """
        return

    def skipPrereqs(self):
        """
        Method to return boolean to skip dependency prerequisites checks.
        """
        if self.options.ignored_caveats:
            if 'all' in self.options.ignored_caveats or 'prereq' in self.options.ignored_caveats:
                return True
        return False

    def processDownstreamTests(self, job_container):
        """
        Method to discover and delete downstream jobs due to supplied job failing.
        """
        with self.dag_lock:
            failed_job_containers = set([])
            tester = job_container.getTester()
            job_dag = job_container.getDAG()
            if (tester.isFinished() and not tester.didPass() and not tester.isSilent() and not self.skipPrereqs()) \
               and not tester.isQueued() \
               or (self.options.dry_run and not tester.isSilent()):

                # Ask the DAG to delete and return the downstream jobs associated with this job
                failed_job_containers.update(
                    job_dag.delete_downstreams(job_container))

            for failed_job in failed_job_containers:
                failed_tester = failed_job.getTester()
                failed_tester.setStatus('skipped dependency',
                                        failed_tester.bucket_skip)

        return failed_job_containers

    def buildDAG(self, job_container_dict, job_dag):
        """
        Build the DAG and catch any failures.
        """

        failed_or_skipped_testers = set([])

        # Create DAG independent nodes
        for tester_name, job_container in job_container_dict.iteritems():
            tester = job_container.getTester()

            # If this tester is not runnable, continue to the next tester
            if tester.getRunnable(self.options):

                job_dag.add_node_if_not_exists(job_container)

            else:
                failed_or_skipped_testers.add(tester)
                continue

        # Create edge nodes
        for tester_name, job_container in job_container_dict.iteritems():
            tester = job_container.getTester()

            # Add the prereq node and edges
            for prereq in tester.getPrereqs():

                try:
                    # Try to produce a KeyError and capture an unknown dependency
                    job_container_dict[prereq]

                    # Try to produce either a cyclic or skipped dependency error using the DAG's
                    # built-in exception methods
                    job_dag.add_edge(job_container_dict[prereq], job_container)

                # Skipped Dependencies
                except dag.DAGEdgeIndError:
                    if not self.skipPrereqs():
                        if self.options.reg_exp:
                            tester.setStatus('dependency does not match re',
                                             tester.bucket_skip)
                        else:
                            tester.setStatus('skipped dependency',
                                             tester.bucket_skip)
                        failed_or_skipped_testers.add(tester)

                    # Add the parent node / dependency edge to create a functional DAG now that we have caught
                    # the skipped dependency (needed for discovering race conditions later on)
                    job_dag.add_node_if_not_exists(job_container_dict[prereq])
                    job_dag.add_edge(job_container_dict[prereq], job_container)

                # Cyclic Failure
                except dag.DAGValidationError:
                    tester.setStatus('Cyclic or Invalid Dependency Detected!',
                                     tester.bucket_fail)
                    failed_or_skipped_testers.add(tester)

                # Unknown Dependency Failure
                except KeyError:
                    tester.setStatus('unknown dependency', tester.bucket_fail)
                    failed_or_skipped_testers.add(tester)

                # Skipped/Silent/Deleted Testers fall into this catagory, caused by 'job_container' being skipped
                # during the first iteration above
                except dag.DAGEdgeDepError:
                    pass

        # With a working DAG created above (even a partial one), discover race conditions with remaining runnable
        # testers.
        failed_or_skipped_testers.update(self.checkRaceConditions(job_dag))

        return failed_or_skipped_testers

    def checkRaceConditions(self, dag_object):
        """
        Return a set of failing testers exhibiting race conditions with their
        output file.
        """
        failed_or_skipped_testers = set([])

        # clone the dag so we can operate destructively on the cloned dag
        dag_clone = dag_object.clone()

        while dag_clone.size():
            output_files_in_dir = set()

            # Get a list of concurrent job containers
            concurrent_jobs = dag_clone.ind_nodes()

            for job_container in concurrent_jobs:
                tester = job_container.getTester()
                output_files = tester.getOutputFiles()

                # check if we have colliding output files
                if len(output_files_in_dir.intersection(set(output_files))):

                    # Fail this concurrent group of testers
                    for this_job in concurrent_jobs:
                        failed_tester = this_job.getTester()
                        failed_tester.setStatus('OUTFILE RACE CONDITION',
                                                tester.bucket_fail)
                        failed_or_skipped_testers.add(failed_tester)

                    # collisions detected, move on to the next set
                    break

                output_files_in_dir.update(output_files)

            # Delete this group of job containers and allow the loop to continue
            for job_container in concurrent_jobs:
                dag_clone.delete_node(job_container)

        return failed_or_skipped_testers

    def schedule(self, testers):
        """
        Schedule supplied list of testers for execution.
        """
        # If any threads caused an exception, we have already closed down the queue and need to
        # not schedule any more jobs
        if self.run_pool._state:
            return

        # Instance the DAG class so we can share it amongst all the Job containers
        job_dag = dag.DAG()

        non_runnable_jobs = set([])
        name_to_job_container = {}

        # Increment our simple queue count with the number of testers the scheduler received
        with self.slot_lock:
            self.job_queue_count += len(testers)

        # Create a local dictionary of tester names to job containers. Add this dictionary to a
        # set. We will use this set as a way to gain access to their methods.
        for tester in testers:
            name_to_job_container[tester.getTestName()] = Job(
                tester, job_dag, self.options)
            self.tester_datas.add(name_to_job_container[tester.getTestName()])

        # Populate job_dag with testers. This method will also return any testers which caused failures
        # while building the DAG.
        skipped_or_failed_testers = self.buildDAG(name_to_job_container,
                                                  job_dag)

        # Create a set of failing job containers
        for failed_tester in skipped_or_failed_testers:
            non_runnable_jobs.add(
                name_to_job_container[failed_tester.getTestName()])

        # Iterate over the jobs in our non_runnable_jobs and handle any downstream jobs affected by
        # 'job'. These will be our 'skipped dependency' tests.
        for job in non_runnable_jobs.copy():
            additionally_skipped = self.processDownstreamTests(job)
            non_runnable_jobs.update(additionally_skipped)
            job_dag.delete_node_if_exists(job)

        # Get a count of all the items still in the DAG. These will be the jobs that ultimately are queued
        runnable_jobs = job_dag.size()

        # Make sure we didn't drop a tester somehow
        if len(non_runnable_jobs) + runnable_jobs != len(testers):
            raise SchedulerError(
                'Runnable tests in addition to Skipped tests does not match total scheduled test count!'
            )

        # Inform derived schedulers of the jobs we are skipping immediately
        self.reportSkipped(non_runnable_jobs)

        # Assign a status thread to begin work on any skipped/failed jobs
        self.queueJobs(status_jobs=non_runnable_jobs)

        # Allow derived schedulers to modify the dag before we launch
        # TODO: We don't like this, and this will change when we move to better DAG handling.
        if runnable_jobs:
            self.preLaunch(job_dag)

        # Build our list of runnable jobs and set the tester's status to queued
        job_list = []
        if runnable_jobs:
            job_list = job_dag.ind_nodes()
            for job_container in job_list:
                tester = job_container.getTester()
                tester.setStatus('QUEUED', tester.bucket_pending)

        # Queue runnable jobs
        self.queueJobs(run_jobs=job_list)

    def waitFinish(self):
        """
        Block while the job queue is not empty. Once empty, this method will begin closing down
        the thread pools and perform a join. Once the last thread exits, we return from this
        method.

        There are two thread pools in play; the Tester pool which is performing all the tests,
        and the Status pool which is handling the printing of tester statuses. Because the
        Status pool will always have the last item needing to be 'printed', we close and join
        the Tester pool first, and then we do the same to the Status pool.
        """
        while self.job_queue_count > 0:
            sleep(0.5)

        self.run_pool.close()
        self.run_pool.join()
        self.status_pool.close()
        self.status_pool.join()

        # Notify derived schedulers we are exiting
        self.notifyFinishedSchedulers()

    def handleLongRunningJobs(self, job_container):
        """ Handle jobs that have not reported in alotted time """
        if job_container not in self.jobs_reported:
            tester = job_container.getTester()
            tester.setStatus('RUNNING...', tester.bucket_pending)
            self.queueJobs(status_jobs=[job_container])

            # Restart the reporting timer for this job
            job_container.report_timer = threading.Timer(
                float(tester.getMinReportTime()), self.handleLongRunningJobs,
                (job_container, ))

            job_container.report_timer.start()

    def handleTimeoutJobs(self, job_container):
        """ Handle jobs that have timed out """
        tester = job_container.getTester()
        tester.setStatus('TIMEOUT', tester.bucket_fail)
        job_container.killProcess()

    def getLoad(self):
        """ Method to return current load average """
        loadAverage = 0.0
        try:
            loadAverage = os.getloadavg()[0]
        except AttributeError:
            pass  # getloadavg() not available in this implementation of os
        return loadAverage

    def satisfyLoad(self):
        """ Method for controlling load average """
        while self.slots_in_use > 1 and self.getLoad() >= self.average_load:
            sleep(1.0)

    def reserveSlots(self, job_container):
        """
        Method which allocates resources to perform the job. Returns bool if job
        should be allowed to run based on available resources.
        """
        tester = job_container.getTester()

        # comply with load average
        if self.options.load:
            self.satisfyLoad()

        with self.slot_lock:
            can_run = False
            if self.slots_in_use + job_container.getProcessors(
            ) <= self.available_slots:
                can_run = True

            # Check for insufficient slots -soft limit
            # TODO: Create a unit test for this case
            elif job_container.getProcessors(
            ) > self.available_slots and self.soft_limit:
                tester.specs.addParam('caveats', ['OVERSIZED'], "")
                can_run = True

            # Check for insufficient slots -hard limit (skip this job)
            # TODO: Create a unit test for this case
            elif job_container.getProcessors(
            ) > self.available_slots and not self.soft_limit:
                tester.setStatus('insufficient slots', tester.bucket_skip)

            if can_run:
                self.slots_in_use += job_container.getProcessors()

        return can_run

    def getNextJobGroup(self, job_dag):
        """
        Prepare and return a list of concurrent runnable jobs
        """
        with self.dag_lock:
            next_job_list = []

            # Get concurrent available job list
            concurrent_jobs = job_dag.ind_nodes()

            for job_container in concurrent_jobs:
                tester = job_container.getTester()

                # Verify this job is not already running/pending/skipped
                if tester.isInitialized():
                    # Set this next new job to pending so as to prevent this job from being launched a second time
                    tester.setStatus('QUEUED', tester.bucket_pending)
                    next_job_list.append(job_container)

        return next_job_list

    def queueJobs(self, status_jobs=[], run_jobs=[]):
        """
        Method to control which thread pool jobs enter.
        Syntax:

           To have a job(s) display its current status to the screen:
           .queueJobs(status_jobs=[job_container_list]

           To begin running job(s):
           .queueJobs(run_jobs=[job_container_list]

        """
        for job_container in run_jobs:
            if not self.run_pool._state:
                self.run_pool.apply_async(self.runWorker, (job_container, ))

        for job_container in status_jobs:
            if not self.status_pool._state:
                self.status_pool.apply_async(self.statusWorker,
                                             (job_container, ))

    def statusWorker(self, job_container):
        """ Method the status_pool calls when an available thread becomes ready """
        # Wrap entire statusWorker thread inside a try/exception to catch thread errors
        try:
            tester = job_container.getTester()

            # If the job is still running for a long period of time and we have not reported
            # this same job alread, report it now.
            if tester.isPending():
                if clock() - self.last_reported >= float(
                        tester.getMinReportTime(
                        )) and job_container not in self.jobs_reported:
                    # Inform the TestHarness of a long running test (RUNNING...)
                    self.harness.handleTestStatus(job_container)

                    # ...And then set the finished caveat now that the running status has printed
                    tester.specs.addParam('caveats', ['FINISHED'], "")

                    # Add this job to the reported container so it does not happen again
                    self.jobs_reported.add(job_container)

                # Job is 'Pending', but is under the threshold to be reported (return now so
                # last_reported time does not get updated). This will ensure that if nothing
                # has happened between 'now' and another occurrence of our thread timer event
                # we do report it.
                else:
                    return

            else:
                # All other statuses are sent unmolested
                self.harness.handleTestStatus(job_container)

            # Decrement the job queue count now that this job has finished
            if tester.isFinished():
                with self.slot_lock:
                    self.job_queue_count -= 1

            # Record current reported time only if it is an activity the user will see
            if not tester.isSilent() or not tester.isDeleted():
                self.last_reported = clock()

        except Exception as e:
            print('statusWorker Exception: %s' % (e))
            self.killRemaining()

    def runWorker(self, job_container):
        """ Method the run_pool calls when an available thread becomes ready """
        # Wrap the entire runWorker thread inside a try/exception to catch thread errors
        try:
            tester = job_container.getTester()
            # Check if there are enough resources to run this job
            if self.reserveSlots(job_container):

                # Start long running timer
                job_container.report_timer = threading.Timer(
                    float(tester.getMinReportTime()),
                    self.handleLongRunningJobs, (job_container, ))
                job_container.report_timer.start()

                # Start timeout timer
                timeout_timer = threading.Timer(float(tester.getMaxTime()),
                                                self.handleTimeoutJobs,
                                                (job_container, ))
                timeout_timer.start()

                # Call the derived run method (blocking)
                self.run(job_container)

                # Stop timers now that the job has finished on its own
                job_container.report_timer.cancel()
                timeout_timer.cancel()

                # Derived run needs to set a non-pending status of some sort.
                if tester.isPending():
                    raise SchedulerError(
                        'Derived Scheduler %s can not return a pending status!'
                        % (self.__class__))

                # Determin if this job creates any skipped dependencies (if it failed), and send
                # this new list of jobs to the status queue to be printed.
                possibly_skipped_job_containers = self.processDownstreamTests(
                    job_container)
                possibly_skipped_job_containers.add(job_container)
                self.queueJobs(status_jobs=possibly_skipped_job_containers)

                # Delete this job from the shared DAG while the DAG is locked
                with self.dag_lock:
                    job_dag = job_container.getDAG()
                    job_dag.delete_node(job_container)

                # Get next job list
                next_job_group = self.getNextJobGroup(job_dag)

                # Allow derived schedulers to perform post run operations
                self.postRun(job_container)

                # Recover worker count before attempting to queue more jobs
                with self.slot_lock:
                    self.slots_in_use = max(
                        0, self.slots_in_use - job_container.getProcessors())

                # Queue this new batch of runnable jobs
                self.queueJobs(run_jobs=next_job_group)

            # Not enough slots to run the job, currently
            else:
                # There will never be enough slots to run this job (insufficient slots)
                if tester.isFinished():
                    failed_downstream = self.processDownstreamTests(
                        job_container)
                    failed_downstream.add(job_container)
                    self.queueJobs(status_jobs=failed_downstream)

                # There are no available slots, currently. Place back in queue, and sleep for a bit
                else:
                    self.queueJobs(run_jobs=[job_container])
                    sleep(0.3)

        except Exception as e:
            print('runWorker Exception: %s' % (e))
            self.killRemaining()

Exemple #60

0

Afficher le fichier

def build_cache_maps(context, configurations, region, installed_region):
    """Build a giant cache of instances, volumes, snapshots for region"""
    LOG.info("Building cache of instance, volume, and snapshots in %s", region)
    LOG.info("This may take a while...")
    cache_data = {
        # calculated here locally
        'instance_id_to_data': {},
        'instance_id_to_config': {},
        'volume_id_to_instance_id': {},

        # calculated w/ multiprocessing module
        'snapshot_id_to_data': {},
        'volume_id_to_snapshot_count': {},
        'volume_id_to_most_recent_snapshot_date': {},
    }

    # build an EC2 client, we're going to need it
    ec2 = boto3.client('ec2', region_name=region)

    if len(configurations) <= 0:
        LOG.info('No configurations found in %s, not building cache', region)
        return cache_data

    # populate them
    LOG.info("Retrieved %s DynamoDB configurations for caching",
             str(len(configurations)))

    # build a list of any IDs (anywhere) that we should ignore
    ignore_ids = build_ignore_list(configurations)

    for config in configurations:
        # stop if we're running out of time
        if ebs_snapper.timeout_check(context, 'build_cache_maps'):
            break

        # if it's missing the match section, ignore it
        if not validate_snapshot_settings(config):
            continue

        # build a boto3 filter to describe instances with
        configuration_matches = config['match']
        filters = convert_configurations_to_boto_filter(configuration_matches)

        # if we ended up with no boto3 filters, we bail so we don't snapshot everything
        if len(filters) <= 0:
            LOG.warn('Could not convert configuration match to a filter: %s',
                     configuration_matches)
            continue

        filters.append({
            'Name': 'instance-state-name',
            'Values': ['running', 'stopped']
        })
        instances = ec2.describe_instances(Filters=filters)
        res_list = instances.get('Reservations', [])
        random.shuffle(res_list)  # attempt to randomize order, for timeouts

        for reservation in res_list:
            inst_list = reservation.get('Instances', [])
            random.shuffle(
                inst_list)  # attempt to randomize order, for timeouts

            for instance_data in inst_list:
                instance_id = instance_data['InstanceId']

                # skip if we're ignoring this
                if instance_id in ignore_ids:
                    continue

                cache_data['instance_id_to_config'][instance_id] = config
                cache_data['instance_id_to_data'][instance_id] = instance_data
                for dev in instance_data.get('BlockDeviceMappings', []):
                    vid = dev['Ebs']['VolumeId']

                    # skip if we're ignoring this
                    if vid in ignore_ids:
                        continue

                    cache_data['volume_id_to_instance_id'][vid] = instance_id

    LOG.info("Retrieved %s instances for caching",
             str(len(cache_data['instance_id_to_data'].keys())))

    # look at each volume, get snapshots and count / most recent, and map to instance
    process_volumes = cache_data['volume_id_to_instance_id'].keys()[:]
    LOG.info("Retrieved %s volumes for caching", str(len(process_volumes)))

    chunked_work = []
    while len(process_volumes) > 0:
        popped = process_volumes[:25]
        del process_volumes[:25]
        chunked_work.append(popped)

    LOG.debug('Split out volume work into %s lists, pulling snapshots...',
              str(len(chunked_work)))

    if len(chunked_work) > 0:
        f = functools.partial(chunk_volume_work, region)
        pool = ThreadPool(processes=4)
        results = pool.map(f, chunked_work)
        pool.close()
        pool.join()

        keys = [
            'volume_id_to_most_recent_snapshot_date',
            'volume_id_to_snapshot_count', 'snapshot_id_to_data'
        ]
        for result_chunk in results:
            for k in keys:
                cache_data[k].update(result_chunk[k])

    LOG.info("Retrieved %s snapshots for caching",
             str(len(cache_data['snapshot_id_to_data'])))

    return cache_data