Exemple #1
0
def run_trials():
    numTrials = 3000
    gens = 1000
    from multiprocessing.pool import ThreadPool as Pool
    pool = Pool(50)

    jids = pool.map(f,[gens]*numTrials)
    print "Done spawning trials. Retrieving results..."

    results = pool.map(cloud_result, jids)
    firstLocusFreqsHists = zeros((numTrials,gens+1), dtype='float')
    lastLocusFreqsHists = zeros((numTrials,gens+1), dtype='float')
    print "Done retrieving results. Press Enter to serialize..."

    raw_input()

    for i, result in enumerate(results):
        firstLocusFreqsHists[i, :], lastLocusFreqsHists[i, :] = result

    with closing(FileStorage("soda_results.durus")) as durus:
        conn = Connection(durus)
        conn.get_root()[str(int(floor(time.time())))] = (firstLocusFreqsHists, lastLocusFreqsHists)
        conn.commit()

    pool.close()
    pool.join()
Exemple #2
0
def demo(args):
    """ Demonstrates the Python logging facility. """

    cli = argparse.ArgumentParser()
    cli.add_argument("--verbose", "-v", action='count', default=ENV_VERBOSITY)
    cli.add_argument("--quiet", "-q", action='count', default=0)
    args = cli.parse_args(args)

    level = verbosity_to_level(args.verbose - args.quiet)

    info("new log level: " + str(level))
    old_level = set_loglevel(level)
    info("old level was: " + str(old_level))

    info("printing some messages with different log levels")

    spam("rofl")
    dbg("wtf?")
    info("foo")
    warn("WARNING!!!!")
    err("that didn't go so well")
    crit("pretty critical, huh?")

    info("restoring old loglevel")

    set_loglevel(old_level)

    info("old loglevel restored")
    info("running some threaded stuff")

    pool = ThreadPool()
    for i in range(8):
        pool.apply_async(info, ("async message #" + str(i),))
    pool.close()
    pool.join()
Exemple #3
0
def update(args=None):
    projects = list_projects(False, args.dir)

    print("Update in progress...")

    if args.j:
        pool = Pool(args.j)

        def worker(p):
            if p.is_behind():
                p.update()
                print("{} updated".format(p.name))

        for p in projects:
            pool.apply_async(worker, (p,))

        pool.close()
        pool.join()
    else:
        for p in projects:
            if p.is_behind():
                p.update()
                print("{} updated".format(p.name))

    print("Update done")
Exemple #4
0
def dowload_person(person_url):
    print 'start to downlaod person %s\n'%(person_url)
    person_pic_url = get_person_pic_url_Set(person_url)
    pool = ThreadPool(8)
    pool.map(download_pic,person_pic_url)
    pool.close()
    pool.join()
Exemple #5
0
def local_job_runner(cmds_list, num_threads, throw_error=True):
    """
    Execute a list of cmds locally using thread pool with at most
    num_threads threads, wait for all jobs to finish before exit.

    If throw_error is True, when any job failed, raise RuntimeError.
    If throw_error is False, return a list of cmds that failed.

    Parameters:
      cmds_list - cmds that will be executed in ThreadPool
      num_threads - number of threads that will be used in the ThreadPool
      throw_error - whether or not to throw RuntimeError when any of cmd failed.
      rescue - whether or not to rescue this job
      rescue_times - maximum number of rescue times
    """
    run_cmd_in_shell = lambda x: backticks(x, merge_stderr=True)
    try:
        pool = ThreadPool(processes=num_threads)
        rets = pool.map(run_cmd_in_shell, cmds_list)
        pool.close()
        pool.join()
    except subprocess.CalledProcessError:
        pass

    failed_cmds = [cmds_list[i] for i in range(0, len(cmds_list)) if rets[i][1] != 0]
    failed_cmds_out = [rets[i][0] for i in range(0, len(cmds_list)) if rets[i][1] != 0]

    if throw_error and len(failed_cmds) > 0:
        errmsg = "\n".join(["CMD failed: %s, %s" % (cmd, out)
                            for (cmd, out) in zip(failed_cmds, failed_cmds_out)])
        raise RuntimeError(errmsg)
    else:
        return failed_cmds
Exemple #6
0
def main(dir_path, outfile_path, is_journal=True):
    pn = 20
    flst = os.listdir(dir_path)
    arglst = []
    ret = dict()
    for i in range(pn):
        beg = int(math.ceil(float(len(flst)) / pn * i))
        end = int(math.ceil(float(len(flst)) / pn * (i + 1)))
        if(id == 0):
            beg = 0
        if(id == pn - 1):
            end = (len(flst))
        arglst.append([dir_path, is_journal, beg, end, i, ret])
    pool = ThreadPool(pn)
    pool.map(job_map, arglst)
    pool.close()
    pool.join()
    print(80 * '=')
    print('[acmdl]: map finished')
    print(80 * '=')
    job_reduce(ret, outfile_path)
    print(80 * '=')
    print('[acmdl]: reduce finished')
    print(80 * '=')
    return
Exemple #7
0
    def getMessagesBySource(self, source, batch_mode=False):
        """
        Returns the messages for the given source, including messages
        from the configured builder (if available) and static checks
        Extra arguments are
        """
        self._setupEnvIfNeeded()

        if self._USE_THREADS:
            records = []
            pool = ThreadPool()

            static_check = pool.apply_async(
                getStaticMessages, args=(source.getSourceContent().split('\n'), ))

            if self._isBuilderCallable():
                builder_check = pool.apply_async(self._getBuilderMessages,
                                                 args=[source, batch_mode])
                records += builder_check.get()

            records += static_check.get()

            pool.terminate()
            pool.join()
        else:
            records = getStaticMessages(source.getSourceContent().split('\n'))
            if self._isBuilderCallable():
                records += self._getBuilderMessages(source, batch_mode)

        self._saveCache()
        return records
    def handle_noargs(self, **options):
        mimetypes.init()

        locked_print("===> Syncing static directory")
        pool = ThreadPool(20)

        # Sync every file in the static media dir with S3
        def pooled_sync_file(base, filename):
            pool.apply_async(self.sync_file, args=[base, filename])

        self.walk_tree([conf.SIMPLESTATIC_DIR], pooled_sync_file)
        pool.close()
        pool.join()
        locked_print("===> Static directory syncing complete")

        locked_print("===> Compressing and uploading CSS and JS")
        pool = ThreadPool(20)

        # Iterate over every template, looking for SimpleStaticNode
        def pooled_handle_template(base, filename):
            pool.apply_async(self.handle_template, args=[base, filename])

        self.walk_tree(list(settings.TEMPLATE_DIRS), pooled_handle_template)
        pool.close()
        pool.join()
        locked_print("===> Finished compressing and uploading CSS and JS")
Exemple #9
0
def get_for_genres(genres):
    genres = set(genres)
    playlists = {}
    new_genres = set()

    for page in xrange(5):
        args = []
        for g in genres:
            args.append((g, page))

        try:
            pool = ThreadPool(PROCESSES)
            pfunc = parse_page
            for i, res in enumerate(pool.imap_unordered(pfunc, args)):
                genre, page, pl, found = res
                print "%d/%d" % (i + 1, len(args))
                playlists.update(pl)
                new_genres |= found
                if not pl:
                    genres.remove(genre)
        except Exception as e:
            print e
            return playlists, []
        finally:
            pool.terminate()
            pool.join()

    return playlists, new_genres
    def _power_off_and_delete_all_vm_resources(self, api, reservation_details):
        resources = reservation_details.ReservationDescription.Resources

        pool = ThreadPool()
        async_results = []
        lock = Lock()
        message_status = {
            "power_off": False,
            "delete": False
        }

        for resource in resources:
            resource_details = api.GetResourceDetails(resource.Name)
            if resource_details.VmDetails:
                result_obj = pool.apply_async(self._power_off_or_delete_deployed_app,
                                              (api, resource_details, lock, message_status))
                async_results.append(result_obj)

        pool.close()
        pool.join()

        resource_to_delete = []
        for async_result in async_results:
            result = async_result.get()
            if result is not None:
                resource_to_delete.append(result)

        # delete resource - bulk
        if resource_to_delete:
            api.DeleteResources(resource_to_delete)
	def read(self, sftppath, localPath = None, numParallelConnections = 1):
		if localPath is None:
			localPath = os.getcwd() # local path - can be changed later
		sftp = paramiko.SFTPClient.from_transport(self.transport)
		if (numParallelConnections > 1):
			pool = ThreadPool(numParallelConnections)

		def getFile(sftppath, localpath):
			pconnection = SFTPConnection(self.connectionInfo)
			pconnection.connect()
			psftp = paramiko.SFTPClient.from_transport(pconnection.transport)
			psftp.get(sftppath, localpath)
			psftp.close()
			pconnection.close()

		def recursiveRead(sftp, sftppath, localPath):
			fileattr = sftp.lstat(sftppath)
			if not stat.S_ISDIR(fileattr.st_mode): #it is a file
				if (numParallelConnections > 1):
					pool.apply_async(getFile, args= (sftppath, os.path.join(localPath, os.path.basename(sftppath))))
				else:
					sftp.get(sftppath, os.path.join(localPath, os.path.basename(sftppath)))
			else: #it is a directory
				try: #creating local directory, using try-catch to handle race conditions
					os.makedirs(os.path.join(localPath, os.path.basename(sftppath)))
				except OSError as exception:
					if exception.errno != errno.EEXIST:
						raise
				for file in sftp.listdir_attr(sftppath):
					recursiveRead(sftp, os.path.join(sftppath, file.filename), os.path.join(localPath, os.path.basename(sftppath)))
		recursiveRead(sftp, sftppath, localPath)
		sftp.close()
		if (numParallelConnections > 1):
			pool.close()
			pool.join()
Exemple #12
0
    def _run_tests(self):
        "Runs the tests, produces no report."
        run_alone = []

        tests = self._tests
        pool = ThreadPool(self._worker_count)
        try:
            for cmd, options in tests:
                options = options or {}
                if matches(self._configured_run_alone_tests, cmd):
                    run_alone.append((cmd, options))
                else:
                    self._spawn(pool, cmd, options)
            pool.close()
            pool.join()

            if run_alone:
                util.log("Running tests marked standalone")
                for cmd, options in run_alone:
                    self._run_one(cmd, **options)
        except KeyboardInterrupt:
            try:
                util.log('Waiting for currently running to finish...')
                self._reap_all()
            except KeyboardInterrupt:
                pool.terminate()
                raise
        except:
            pool.terminate()
            raise
Exemple #13
0
def bench_compression_comparison(n_chunks, df_length, append_mul, pool_size, pool_step, repeats,
                                 use_raw_lz4, use_HC):
    _str = construct_test_data(df_length, append_mul)
    chunk_size = len(_str) / 1024 ** 2.0
    _strarr = [_str] * n_chunks

    # Single threaded
    # ---------------
    measurements = bench_single(repeats, _strarr, use_HC)
    print_results(1, chunk_size, n_chunks, chunk_size*n_chunks, measurements)
    single_mean = np.mean(measurements)

    # Multi-threaded
    # --------------
    for sz in range(2, pool_size + 1, pool_step):
        if use_raw_lz4:
            pool = ThreadPool(sz)
        else:
            pool = None
            c.set_compression_pool_size(sz)
        measurements = bench_multi(repeats, _strarr, use_HC, pool=pool)
        print_results(sz, chunk_size, n_chunks, chunk_size * n_chunks, measurements, compare=single_mean)
        if pool:
            pool.close()
            pool.join()
    print("")
Exemple #14
0
def thread(host, port, threads, num):
    pool = ThreadPool(threads)
    for _ in range(num):
        pool.apply_async(job, (host, port))
        time.sleep(0.001)
    pool.close()
    pool.join()
Exemple #15
0
  def check_artifact_cache(self, vts):
    """Checks the artifact cache for the specified VersionedTargetSets.

    Returns a list of the ones that were satisfied from the cache. These don't require building.
    """
    if not vts:
      return [], []

    cached_vts = []
    uncached_vts = OrderedSet(vts)
    if self._artifact_cache and self.context.options.read_from_artifact_cache:
      pool = ThreadPool(processes=6)
      res = pool.map(lambda vt: self._artifact_cache.use_cached_files(vt.cache_key),
                     vts, chunksize=1)
      pool.close()
      pool.join()
      for vt, was_in_cache in zip(vts, res):
        if was_in_cache:
          cached_vts.append(vt)
          uncached_vts.discard(vt)
          self.context.log.info('Using cached artifacts for %s' % vt.targets)
          vt.update()
        else:
          self.context.log.info('No cached artifacts for %s' % vt.targets)
    return cached_vts, list(uncached_vts)
    def run(self, suites):
        wrapper = self.config.plugins.prepareTest(suites)
        if wrapper is not None:
            suites = wrapper

        wrapped = self.config.plugins.setOutputStream(self.stream)
        if wrapped is not None:
            self.stream = wrapped

        result = self._makeResult()

        size = self.config.options.thread_pool
        if size < 0:
            size = cpu_count()

        pool = ThreadPool(size)

        with measure_time(result):

            for suite in suites:
                pool.apply_async(suite, args=(result,))

            pool.close()
            pool.join()

        self.config.plugins.finalize(result)
        return result
	def downloadPDFs(self):
		### Download all the files extracted from the metadata
		startTime = time.strftime("%c")
		# Loop through the CSV
		f = open(self.csvpath)
		metadata = csv.reader(f, quotechar='"', delimiter=',', quoting=csv.QUOTE_ALL, skipinitialspace=True)
		
		for row in metadata:
			pmcid = row[8]
			
			### Check the input is a PMC ID
			if 'PMC' in pmcid:
				print('Starting thread for: '+pmcid)
				
				pool = Pool(30)
				pool.apply_async(self.saveFile, (pmcid,))
				pool.close()
				pool.join()
			else:
				print('Something is wrong. '+pmcid+' is not a PMC id')
				sys.exit(0)
			
		f.close()
		
		print('Finished downloading all files: start {} end {}.'.format(startTime, time.strftime("%c")))
def ons_resolver(key):

    def check_server(server):

        try:
            namecoind = NamecoindServer(server, NAMECOIND_PORT, NAMECOIND_USER, NAMECOIND_PASSWD)
            return_data = namecoind.get_full_profile('u/' + key)
            return return_data
        except:
            return error_reply("Couldn't connect to namecoind")


    pool = ThreadPool(len(ONS_SERVERS))

    replies = pool.map(check_server, ONS_SERVERS)
    pool.close()
    pool.join() 

    data_hashes = []
    for reply in replies:
        data_hashes.append(hashlib.md5(json.dumps(reply)).hexdigest())

    count = Counter(data_hashes)
    max_repeated_times = count.most_common()[0][1]
    
    if max_repeated_times >= (SERVER_CONFIRMATION_PERCENTAGE/100.0) * len(ONS_SERVERS):
        return replies[0]
    else:
        return error_reply("Data from different ONS servers doens't match")
Exemple #19
0
def run_tidy(sha="HEAD", is_rev_range=False):
    diff_cmdline = ["git", "diff" if is_rev_range else "show", sha]

    # Figure out which paths changed in the given diff.
    changed_paths = subprocess.check_output(diff_cmdline + ["--name-only", "--pretty=format:"]).splitlines()
    changed_paths = [p for p in changed_paths if p]

    # Produce a separate diff for each file and run clang-tidy-diff on it
    # in parallel.
    def tidy_on_path(path):
        patch_file = tempfile.NamedTemporaryFile()
        cmd = diff_cmdline + [
            "--src-prefix=%s/" % ROOT,
            "--dst-prefix=%s/" % ROOT,
            "--",
            path]
        subprocess.check_call(cmd, stdout=patch_file, cwd=ROOT)
        cmdline = [CLANG_TIDY_DIFF,
                   "-clang-tidy-binary", CLANG_TIDY,
                   "-p0",
                   "--",
                   "-DCLANG_TIDY"] + compile_flags.get_flags()
        return subprocess.check_output(
            cmdline,
            stdin=file(patch_file.name),
            cwd=ROOT)
    pool = ThreadPool(multiprocessing.cpu_count())
    try:
        return "".join(pool.imap(tidy_on_path, changed_paths))
    except KeyboardInterrupt as ki:
        sys.exit(1)
    finally:
        pool.terminate()
        pool.join()
def main():
    pool = ThreadPool(10)
    base_url = 'https://www.google.com/?gws_rd=ssl#q='
    urls = [base_url+str(i) for i in xrange(1000)]
    pool.map(google_search, urls)
    pool.close()
    pool.join()
	def poll_all(self, recipient_infos):
		# Recipient_info entries are of form: (player, type, body)
		results = dict()
		threads = dict()

		# For each recipient, make an asynchronous process to handle their response
		num_reqs = len(recipient_infos)
		pool = ThreadPool(processes=num_reqs)
		for info in recipient_infos:
			# Unpack poll() args
			receiver = info[0]
			rq_type  = info[1]
			body     = info[2]

			# Run each poll on a separate thread
			threads[receiver] = pool.apply_async(self.poll, (receiver, rq_type, body,))

		# Get the results, store them in a dict
		# Seems like it defeats the purpose of polling asynchronously, but it doesn't (brain teaser?)
		for info in recipient_infos:
			receiver = info[0]
			try:
				results[receiver] = threads[receiver].get(timeout=self.timeout)
			except Exception as e:
				self.log_error(e)
				results[receiver] = None # Worry about this later

		# Clean up those threads
		pool.close()
		pool.join()

		# Return the dict
		return results
Exemple #22
0
        class parallel_map(collections.Iterable):

            def __init__(self, pool_size, function, *iterables):
                if not isinstance(pool_size, numbers.Integral):
                    raise TypeError('pool_size must be an integer, not ' +
                                    repr(pool_size))
                elif not callable(function):
                    raise TypeError('function must be callable, not ' +
                                    repr(function))
                elif not iterables:
                    raise TypeError('missing iterable')
                self.pool = ThreadPool(pool_size)
                self.function = function
                self.results = self.pool.imap_unordered(self.map_function,
                                                        zip(*iterables))

            def map_function(self, args):
                try:
                    value = self.function(*args)
                except Exception:
                    return False, sys.exc_info()
                return True, value

            def __iter__(self):
                errors = []
                for success, value in self.results:
                    if success:
                        yield value
                    else:
                        errors.append(value)
                self.pool.close()
                self.pool.join()
                for error in errors:
                    exec('raise error[1], None, error[2]')
Exemple #23
0
    def _send_some_brokers(self, requests, ignore_errors=True):
        """
        Sends a request to one or more brokers. The responses are returned mapped to the broker that
        they were retrieved from. This method uses a thread pool to parallelize sends.

        Args:
            request (int -> BaseRequest): A dictionary, where keys are integer broker IDs and the values are valid
                request objects that inherit from BaseRequest.

        Returns:
            dict (int -> BaseResponse): A map of broker IDs to response instances (inherited from
                BaseResponse). Failed requests are represented with a value of None
        """
        results = {}
        pool = ThreadPool(processes=self.configuration.broker_threads)
        for broker_id in requests:
            results[broker_id] = pool.apply_async(self._send_to_broker, (broker_id, requests[broker_id]))
        pool.close()
        pool.join()

        responses = {}
        for broker_id in results:
            try:
                responses[broker_id] = results[broker_id].get()
            except ConnectionError:
                if ignore_errors:
                    # Individual broker failures are OK, as we'll represent them with a None value
                    responses[broker_id] = None
                else:
                    raise
        return responses
Exemple #24
0
def main():
    # Run the Tales 
    pool = ThreadPool(processes=int(tcfg['Workers'].get('pool_size', 10)))
    pool = ThreadPool()
    pool.map(worker, tales)
    pool.close()
    pool.join()
Exemple #25
0
    def run(self):
        pool = ThreadPool(self.num_agents)
        for idx in range(self.num_agents):
            pool.apply_async(self.run_experiement, args=(self.experiment, idx))

        pool.close()
        pool.join()
def main():
    good_proxys = []
    socket.setdefaulttimeout(10)
    with open('proxylist.txt') as f:
        proxy_list = f.readlines()

    total = len(proxy_list)
    pool = ThreadPool(multiprocessing.cpu_count() * 2 + 1)
    async_results = []
    for index, proxy in enumerate(proxy_list):
        if proxy.startswith('http://'):
            curr_proxy = proxy[7:].strip()
        else:
            curr_proxy = proxy.strip()

        async_results.append(pool.apply_async(
            check_proxy,
            args=(curr_proxy, index, total)
        ))
    pool.close()
    pool.join()

    for result in async_results:
        proxy = result.get()
        if proxy:
            good_proxys.append(proxy)

    if not good_proxys:
        print 'No proxy are working!'
        return

    with open('proxy.txt', 'w') as f:
        for proxy in good_proxys:
            f.write(proxy + '\n')
    def worker(self, db, lista):
        '''
        Metodo per eseguire il processo di ricerca dei plugin in multithread
        Multithread method for online search
        '''

        # Make the Pool of workers
        processes = 5 
        #WARNING: con la fibra posso arrivare a 20 senza errori, con adsl massimo 4 worker!
        pool = Pool(processes)

        # Open the urls in their own threads and return the results
        pluglist = pool.map(onlinePluginSearch, lista)

        #close the pool and wait for the work to finish
        pool.close()
        pool.join()

        #parsa il risultato (lista con tuple) e metti tutto in una stringa (result) e aggiorna cache
        result = ''
        for item in pluglist:
            if item[1] !=[]:
                for plug in item[1]:
                    db.updateCache(item[0], plug)
                    result = result + str(plug) + ','

        numbers = result.count(',') + 1
        print("Number of available pflugins: %s" % numbers)
        print("Adding to policy plugins: 19506,10287,12634 for credential checks and ping target.")
        result = result + "19506,10287,12634"
        #aggiungo sempre questi 3 plug-in per verificare se il target e' alive

        return result
    def _listArtifacts(self, urls, gavs):
        """
        Loads maven artifacts from list of GAVs and tries to locate the artifacts in one of the
        specified repositories.

        :param urls: repository URLs where the given GAVs can be located
        :param gavs: List of GAVs
        :returns: Dictionary where index is MavenArtifact object and value is it's repo root URL.
        """
        def findArtifact(gav, urls, artifacts):
            artifact = MavenArtifact.createFromGAV(gav)
            for url in urls:
                if maven_repo_util.gavExists(url, artifact):
                    #Critical section?
                    artifacts[artifact] = ArtifactSpec(url)
                    return

            logging.warning('Artifact %s not found in any url!', artifact)

        artifacts = {}
        pool = ThreadPool(maven_repo_util.MAX_THREADS)
        for gav in gavs:
            pool.apply_async(findArtifact, [gav, urls, artifacts])

        # Close the pool and wait for the workers to finnish
        pool.close()
        pool.join()

        return artifacts
Exemple #29
0
 def run(self, max_number_of_live_tokens=None, group=None):
     group = Pool()
     try:
         stages = []
         
         in_q = _DummyQueue()
         end_in = Event()
         if self._filters[0].is_serial:
             serial = Lock()
         else:
             serial = _DummyLock()
         if self._filters[0].is_ordered:
             out_q = PriorityQueue()
         else:
             out_q = Queue()
         
         
         for i, f in enumerate(self._filters):
             pass
                 
         send_q, recv_q = Queue(), Queue()
         
         group.close()
     except:
         group.terminate()
     finally:
         group.join()
    def cleanup(self, odps):
        cleaned = []

        def cleaner_thread(obj):
            try:
                obj.drop(odps)
                cleaned.append(obj)
            except:
                pass

        pool = ThreadPool(CLEANER_THREADS)
        if self._container:
            pool.map(cleaner_thread, self._container)
            pool.close()
            pool.join()
        for obj in cleaned:
            if obj in self._container:
                self._container.remove(obj)
        if not self._container:
            try:
                os.unlink(self._file_name)
            except OSError:
                pass
        else:
            self.dump()
Exemple #31
0
 def start_pool(self):
     pool = Pool(self.p)
     pool.map(self.func, self.arr)
     pool.close()
     pool.join()
Exemple #32
0
    code = emote['regex']
    number = emote['images'][0]['emoticon_set']

    try:
        parentPath = './emotes/' + str(number)
        if not os.path.exists(parentPath):
            os.makedirs(parentPath)
        filePath = './emotes/' + str(number) + '/' + str(code) + '.png'
        if not os.path.exists(filePath):
            if (printme):
                print('Downloading: ' + str(code) + ' in ... ' + filePath)
            urllib.request.urlretrieve(emote['images'][0]['url'], filePath)
            count += 1
        else:
            if (printme):
                print('skipped')

    except Exception as e:
        print(e)


for emote in emotes['emoticons']:
    pool.apply_async(my_op, (emote, ))

pool.close()
pool.join()

end = time.time()
print('Downloaded ' + str(count) + ' new files')
print('Running time: ' + str(end - start))
Exemple #33
0
    def prepare_connectivity(self, reservation, cloud_provider_model,
                             storage_client, resource_client, network_client,
                             logger, actions, cancellation_context):
        """
        :param logging.Logger logger:
        :param actions: list[cloudshell.cp.core.models.RequestActionBase]
        :param network_client:
        :param storage_client:
        :param resource_client:
        :param cloudshell.cp.azure.models.reservation_model.ReservationModel reservation:
        :param cloudshell.cp.azure.models.azure_cloud_provider_resource_model.AzureCloudProviderResourceModel cloud_provider_model:cloud provider
        :param cancellation_context cloudshell.shell.core.driver_context.CancellationContext instance
        :return:
        """
        cidr = self._validate_request_and_extract_cidr(actions)
        logger.info("Received CIDR {0} from server".format(cidr))

        reservation_id = reservation.reservation_id
        group_name = str(reservation_id)
        subnet_name = group_name
        tags = self.tags_service.get_tags(reservation=reservation)
        network_action_result = PrepareCloudInfraResult()

        # 1. Create a resource group
        logger.info("Creating a resource group: {0} .".format(group_name))
        self.vm_service.create_resource_group(
            resource_management_client=resource_client,
            group_name=group_name,
            region=cloud_provider_model.region,
            tags=tags)

        self.cancellation_service.check_if_cancelled(cancellation_context)
        storage_account_name = self._prepare_storage_account_name(
            reservation_id)

        # 2+3. create storage account and keypairs (async)
        pool = ThreadPool()
        storage_res = pool.apply_async(
            self._create_storage_and_keypairs,
            (logger, storage_client, storage_account_name, group_name,
             cloud_provider_model, tags, cancellation_context,
             network_action_result))

        logger.info(
            "Retrieving MGMT vNet from resource group {} by tag {}={}".format(
                cloud_provider_model.management_group_name,
                NetworkService.NETWORK_TYPE_TAG_NAME,
                NetworkService.MGMT_NETWORK_TAG_VALUE))

        virtual_networks = self.network_service.get_virtual_networks(
            network_client=network_client,
            group_name=cloud_provider_model.management_group_name)

        self.cancellation_service.check_if_cancelled(cancellation_context)

        management_vnet = self.network_service.get_virtual_network_by_tag(
            virtual_networks=virtual_networks,
            tag_key=NetworkService.NETWORK_TYPE_TAG_NAME,
            tag_value=NetworkService.MGMT_NETWORK_TAG_VALUE)

        self._validate_management_vnet(management_vnet)

        logger.info(
            "Retrieving sandbox vNet from resource group {} by tag {}={}".
            format(cloud_provider_model.management_group_name,
                   NetworkService.NETWORK_TYPE_TAG_NAME,
                   NetworkService.SANDBOX_NETWORK_TAG_VALUE))

        sandbox_vnet = self.network_service.get_virtual_network_by_tag(
            virtual_networks=virtual_networks,
            tag_key=NetworkService.NETWORK_TYPE_TAG_NAME,
            tag_value=NetworkService.SANDBOX_NETWORK_TAG_VALUE)

        self._validate_sandbox_vnet(sandbox_vnet)

        # 4. Create the NSG object
        security_group_name = reservation_id
        logger.info("Creating a network security group '{}' .".format(
            security_group_name))
        network_security_group = self.security_group_service.create_network_security_group(
            network_client=network_client,
            group_name=group_name,
            security_group_name=security_group_name,
            region=cloud_provider_model.region,
            tags=tags)

        self.cancellation_service.check_if_cancelled(cancellation_context)

        logger.info("Creating NSG management rules...")
        # 5. Set rules on NSG to create a sandbox
        self._create_management_rules(
            group_name=group_name,
            management_vnet=management_vnet,
            network_client=network_client,
            sandbox_vnet_cidr=cidr,
            security_group_name=security_group_name,
            additional_mgmt_networks=cloud_provider_model.
            additional_mgmt_networks,
            logger=logger)

        self.cancellation_service.check_if_cancelled(cancellation_context)

        # 6. Create a subnet with NSG
        self._create_subnet(cidr=cidr,
                            cloud_provider_model=cloud_provider_model,
                            logger=logger,
                            network_client=network_client,
                            resource_client=resource_client,
                            network_security_group=network_security_group,
                            sandbox_vnet=sandbox_vnet,
                            subnet_name=subnet_name)

        self.cancellation_service.check_if_cancelled(cancellation_context)

        # wait for all async operations
        pool.close()
        pool.join()
        storage_res.get(
            timeout=900
        )  # will wait for 15 min and raise exception if storage account creation failed

        return self._prepare_results(network_action_result, actions)
class ApiClient(object):
    """
    Generic API client for Swagger client library builds.

    Swagger generic API client. This client handles the client-
    server communication, and is invariant across implementations. Specifics of
    the methods and models for each application are generated from the Swagger
    templates.

    NOTE: This class is auto generated by the swagger code generator program.
    Ref: https://github.com/swagger-api/swagger-codegen
    Do not edit the class manually.

    :param host: The base path for the server to call.
    :param header_name: a header to pass when making calls to the API.
    :param header_value: a header value to pass when making calls to the API.
    """

    PRIMITIVE_TYPES = (float, bool, bytes, text_type) + integer_types
    NATIVE_TYPES_MAPPING = {
        'int': int,
        'long': int if PY3 else long,
        'float': float,
        'str': str,
        'bool': bool,
        'date': date,
        'datetime': datetime,
        'object': object,
    }

    def __init__(self,
                 configuration=None,
                 header_name=None,
                 header_value=None,
                 cookie=None):
        if configuration is None:
            configuration = Configuration()
        self.configuration = configuration

        self.pool = ThreadPool()
        self.rest_client = RESTClientObject(configuration)
        self.default_headers = {}
        if header_name is not None:
            self.default_headers[header_name] = header_value
        self.cookie = cookie
        # Set default User-Agent.
        self.user_agent = 'Swagger-Codegen/1.0.0/python'

    def __del__(self):
        self.pool.close()
        self.pool.join()

    @property
    def user_agent(self):
        """
        Gets user agent.
        """
        return self.default_headers['User-Agent']

    @user_agent.setter
    def user_agent(self, value):
        """
        Sets user agent.
        """
        self.default_headers['User-Agent'] = value

    def set_default_header(self, header_name, header_value):
        self.default_headers[header_name] = header_value

    def __call_api(self,
                   resource_path,
                   method,
                   path_params=None,
                   query_params=None,
                   header_params=None,
                   body=None,
                   post_params=None,
                   files=None,
                   response_type=None,
                   auth_settings=None,
                   _return_http_data_only=None,
                   collection_formats=None,
                   _preload_content=True,
                   _request_timeout=None):

        config = self.configuration

        # header parameters
        header_params = header_params or {}
        header_params.update(self.default_headers)
        if self.cookie:
            header_params['Cookie'] = self.cookie
        if header_params:
            header_params = self.sanitize_for_serialization(header_params)
            header_params = dict(
                self.parameters_to_tuples(header_params, collection_formats))

        # path parameters
        if path_params:
            path_params = self.sanitize_for_serialization(path_params)
            path_params = self.parameters_to_tuples(path_params,
                                                    collection_formats)
            for k, v in path_params:
                # specified safe chars, encode everything
                resource_path = resource_path.replace(
                    '{%s}' % k,
                    quote(str(v), safe=config.safe_chars_for_path_param))

        # query parameters
        if query_params:
            query_params = self.sanitize_for_serialization(query_params)
            query_params = self.parameters_to_tuples(query_params,
                                                     collection_formats)

        # post parameters
        if post_params or files:
            post_params = self.prepare_post_parameters(post_params, files)
            post_params = self.sanitize_for_serialization(post_params)
            post_params = self.parameters_to_tuples(post_params,
                                                    collection_formats)

        # auth setting
        self.update_params_for_auth(header_params, query_params, auth_settings)

        # body
        if body:
            body = self.sanitize_for_serialization(body)

        # request url
        url = self.configuration.host + resource_path

        # perform request and return response
        response_data = self.request(method,
                                     url,
                                     query_params=query_params,
                                     headers=header_params,
                                     post_params=post_params,
                                     body=body,
                                     _preload_content=_preload_content,
                                     _request_timeout=_request_timeout)

        self.last_response = response_data

        return_data = response_data
        if _preload_content:
            # deserialize response data
            if response_type:
                return_data = self.deserialize(response_data, response_type)
            else:
                return_data = None

        if _return_http_data_only:
            return (return_data)
        else:
            return (return_data, response_data.status,
                    response_data.getheaders())

    def sanitize_for_serialization(self, obj):
        """
        Builds a JSON POST object.

        If obj is None, return None.
        If obj is str, int, long, float, bool, return directly.
        If obj is datetime.datetime, datetime.date
            convert to string in iso8601 format.
        If obj is list, sanitize each element in the list.
        If obj is dict, return the dict.
        If obj is swagger model, return the properties dict.

        :param obj: The data to serialize.
        :return: The serialized form of data.
        """
        if obj is None:
            return None
        elif isinstance(obj, self.PRIMITIVE_TYPES):
            return obj
        elif isinstance(obj, list):
            return [
                self.sanitize_for_serialization(sub_obj) for sub_obj in obj
            ]
        elif isinstance(obj, tuple):
            return tuple(
                self.sanitize_for_serialization(sub_obj) for sub_obj in obj)
        elif isinstance(obj, (datetime, date)):
            return obj.isoformat()

        if isinstance(obj, dict):
            obj_dict = obj
        else:
            # Convert model obj to dict except
            # attributes `swagger_types`, `attribute_map`
            # and attributes which value is not None.
            # Convert attribute name to json key in
            # model definition for request.
            obj_dict = {
                obj.attribute_map[attr]: getattr(obj, attr)
                for attr, _ in iteritems(obj.swagger_types)
                if getattr(obj, attr) is not None
            }

        return {
            key: self.sanitize_for_serialization(val)
            for key, val in iteritems(obj_dict)
        }

    def deserialize(self, response, response_type):
        """
        Deserializes response into an object.

        :param response: RESTResponse object to be deserialized.
        :param response_type: class literal for
            deserialized object, or string of class name.

        :return: deserialized object.
        """
        # handle file downloading
        # save response body into a tmp file and return the instance
        if response_type == "file":
            return self.__deserialize_file(response)

        # fetch data from response object
        try:
            data = json.loads(response.data)
        except ValueError:
            data = response.data

        return self.__deserialize(data, response_type)

    def __deserialize(self, data, klass):
        """
        Deserializes dict, list, str into an object.

        :param data: dict, list or str.
        :param klass: class literal, or string of class name.

        :return: object.
        """
        if data is None:
            return None

        if type(klass) == str:
            if klass.startswith('list['):
                sub_kls = re.match('list\[(.*)\]', klass).group(1)
                return [
                    self.__deserialize(sub_data, sub_kls) for sub_data in data
                ]

            if klass.startswith('dict('):
                sub_kls = re.match('dict\(([^,]*), (.*)\)', klass).group(2)
                return {
                    k: self.__deserialize(v, sub_kls)
                    for k, v in iteritems(data)
                }

            # convert str to class
            if klass in self.NATIVE_TYPES_MAPPING:
                klass = self.NATIVE_TYPES_MAPPING[klass]
            else:
                klass = getattr(models, klass)

        if klass in self.PRIMITIVE_TYPES:
            return self.__deserialize_primitive(data, klass)
        elif klass == object:
            return self.__deserialize_object(data)
        elif klass == date:
            return self.__deserialize_date(data)
        elif klass == datetime:
            return self.__deserialize_datatime(data)
        else:
            return self.__deserialize_model(data, klass)

    def call_api(self,
                 resource_path,
                 method,
                 path_params=None,
                 query_params=None,
                 header_params=None,
                 body=None,
                 post_params=None,
                 files=None,
                 response_type=None,
                 auth_settings=None,
                 async=None,
                 _return_http_data_only=None,
                 collection_formats=None,
                 _preload_content=True,
                 _request_timeout=None):
Exemple #35
0
    def __get_latest_routemanagers(self) -> Optional[Dict[str, dict]]:
        global mode_mapping
        areas: Optional[Dict[str, dict]] = {}

        if self.__configmode:
            return areas

        raw_areas = self.__data_manager.get_root_resource('area')

        thread_pool = ThreadPool(processes=4)

        areas_procs = {}
        for area_id, area_true in raw_areas.items():
            area = area_true.get_resource()
            if area["geofence_included"] is None:
                raise RuntimeError("Cannot work without geofence_included")

            try:
                geofence_included = self.__data_manager.get_resource(
                    'geofence', identifier=area["geofence_included"])
            except Exception:
                raise RuntimeError(
                    "geofence_included for area '{}' is specified but does not exist ('{}')."
                    .format(area["name"], geofence_included))

            geofence_excluded_raw_path = area.get("geofence_excluded", None)
            try:
                if geofence_excluded_raw_path is not None:
                    geofence_excluded = self.__data_manager.get_resource(
                        'geofence', identifier=geofence_excluded_raw_path)
                else:
                    geofence_excluded = None
            except Exception:
                raise RuntimeError(
                    "geofence_excluded for area '{}' is specified but file does not exist ('{}')."
                    .format(area["name"], geofence_excluded_raw_path))

            area_dict = {
                "mode": area_true.area_type,
                "geofence_included": geofence_included,
                "geofence_excluded": geofence_excluded,
                "routecalc": area["routecalc"],
                "name": area['name']
            }
            # also build a routemanager for each area...

            # grab coords
            # first check if init is false, if so, grab the coords from DB
            geofence_helper = GeofenceHelper(geofence_included,
                                             geofence_excluded)
            mode = area_true.area_type
            # build routemanagers

            # map iv list to ids
            if area.get('settings',
                        None) is not None and 'mon_ids_iv' in area['settings']:
                # replace list name
                area['settings']['mon_ids_iv_raw'] = \
                    self.get_monlist(area['settings'].get('mon_ids_iv', None), area.get("name", "unknown"))
            route_resource = self.__data_manager.get_resource(
                'routecalc', identifier=area["routecalc"])

            calc_type: str = area.get("route_calc_algorithm", "route")
            route_manager = RouteManagerFactory.get_routemanager(
                self.__db_wrapper,
                self.__data_manager,
                area_id,
                None,
                mode_mapping.get(mode, {}).get("range", 0),
                mode_mapping.get(mode, {}).get("max_count", 99999999),
                geofence_included,
                path_to_exclude_geofence=geofence_excluded,
                mode=mode,
                settings=area.get("settings", None),
                init=area.get("init", False),
                name=area.get("name", "unknown"),
                level=area.get("level", False),
                coords_spawns_known=area.get("coords_spawns_known", False),
                routefile=route_resource,
                calctype=calc_type,
                joinqueue=self.join_routes_queue,
                s2_level=mode_mapping.get(mode, {}).get("s2_cell_level", 30),
                include_event_id=area.get("settings",
                                          {}).get("include_event_id", None))
            logger.info("Initializing area {}", area["name"])
            if mode not in ("iv_mitm", "idle") and calc_type != "routefree":
                coords = self.__fetch_coords(
                    mode,
                    geofence_helper,
                    coords_spawns_known=area.get("coords_spawns_known", False),
                    init=area.get("init", False),
                    range_init=mode_mapping.get(mode,
                                                {}).get("range_init", 630),
                    including_stops=area.get("including_stops", False),
                    include_event_id=area.get("settings",
                                              {}).get("include_event_id",
                                                      None))

                route_manager.add_coords_list(coords)
                max_radius = mode_mapping[mode]["range"]
                max_count_in_radius = mode_mapping[mode]["max_count"]
                if not area.get("init", False):

                    proc = thread_pool.apply_async(
                        route_manager.initial_calculation,
                        args=(max_radius, max_count_in_radius, 0, False))
                    areas_procs[area_id] = proc
                else:
                    logger.info("Init mode enabled. Going row-based for {}",
                                area.get("name", "unknown"))
                    # we are in init, let's write the init route to file to make it visible in madmin
                    calc_coords = []
                    if area["routecalc"] is not None:
                        for loc in coords:
                            calc_coord = '%s,%s' % (str(loc.lat), str(loc.lng))
                            calc_coords.append(calc_coord)
                        route_resource['routefile'] = calc_coords
                        route_resource.save()
                    # gotta feed the route to routemanager... TODO: without recalc...
                    proc = thread_pool.apply_async(route_manager.recalc_route,
                                                   args=(1, 99999999, 0,
                                                         False))
                    areas_procs[area_id] = proc

            area_dict["routemanager"] = route_manager
            areas[area_id] = area_dict

        for area in areas_procs.keys():
            to_be_checked = areas_procs[area]
            to_be_checked.get()

        thread_pool.close()
        thread_pool.join()
        return areas
Exemple #36
0
class ApiClient(object):
    """Generic API client for Swagger client library builds.

    Swagger generic API client. This client handles the client-
    server communication, and is invariant across implementations. Specifics of
    the methods and models for each application are generated from the Swagger
    templates.

    NOTE: This class is auto generated by the swagger code generator program.
    Ref: https://github.com/swagger-api/swagger-codegen
    Do not edit the class manually.

    :param configuration: .Configuration object for this client
    :param header_name: a header to pass when making calls to the API.
    :param header_value: a header value to pass when making calls to
        the API.
    :param cookie: a cookie to include in the header when making calls
        to the API
    """

    PRIMITIVE_TYPES = (float, bool, bytes, six.text_type) + six.integer_types
    NATIVE_TYPES_MAPPING = {
        'int': int,
        'long': int if six.PY3 else long,  # noqa: F821
        'float': float,
        'str': str,
        'bool': bool,
        'date': datetime.date,
        'datetime': datetime.datetime,
        'object': object,
    }

    def __init__(self,
                 configuration=None,
                 header_name=None,
                 header_value=None,
                 cookie=None):
        if configuration is None:
            configuration = Configuration()
        self.configuration = configuration

        self.pool = ThreadPool()
        self.rest_client = rest.RESTClientObject(configuration)
        self.default_headers = {}
        if header_name is not None:
            self.default_headers[header_name] = header_value
        self.cookie = cookie
        # Set default User-Agent.
        self.user_agent = 'Swagger-Codegen/1.1.0/python'

    def __del__(self):
        self.pool.close()
        self.pool.join()

    @property
    def user_agent(self):
        """User agent for this API client"""
        return self.default_headers['User-Agent']

    @user_agent.setter
    def user_agent(self, value):
        self.default_headers['User-Agent'] = value

    def set_default_header(self, header_name, header_value):
        self.default_headers[header_name] = header_value

    def __call_api(self,
                   resource_path,
                   method,
                   path_params=None,
                   query_params=None,
                   header_params=None,
                   body=None,
                   post_params=None,
                   files=None,
                   response_type=None,
                   auth_settings=None,
                   _return_http_data_only=None,
                   collection_formats=None,
                   _preload_content=True,
                   _request_timeout=None):

        config = self.configuration

        # header parameters
        header_params = header_params or {}
        header_params.update(self.default_headers)
        if self.cookie:
            header_params['Cookie'] = self.cookie
        if header_params:
            header_params = self.sanitize_for_serialization(header_params)
            header_params = dict(
                self.parameters_to_tuples(header_params, collection_formats))

        # path parameters
        if path_params:
            path_params = self.sanitize_for_serialization(path_params)
            path_params = self.parameters_to_tuples(path_params,
                                                    collection_formats)
            for k, v in path_params:
                # specified safe chars, encode everything
                resource_path = resource_path.replace(
                    '{%s}' % k,
                    quote(str(v), safe=config.safe_chars_for_path_param))

        # query parameters
        if query_params:
            query_params = self.sanitize_for_serialization(query_params)
            query_params = self.parameters_to_tuples(query_params,
                                                     collection_formats)

        # post parameters
        if post_params or files:
            post_params = self.prepare_post_parameters(post_params, files)
            post_params = self.sanitize_for_serialization(post_params)
            post_params = self.parameters_to_tuples(post_params,
                                                    collection_formats)

        # auth setting
        self.update_params_for_auth(header_params, query_params, auth_settings)

        # body
        if body:
            body = self.sanitize_for_serialization(body)

        # request url
        url = self.configuration.host + resource_path

        # perform request and return response
        response_data = self.request(method,
                                     url,
                                     query_params=query_params,
                                     headers=header_params,
                                     post_params=post_params,
                                     body=body,
                                     _preload_content=_preload_content,
                                     _request_timeout=_request_timeout)

        self.last_response = response_data

        return_data = response_data
        if _preload_content:
            # deserialize response data
            if response_type:
                return_data = self.deserialize(response_data, response_type)
            else:
                return_data = None

        if _return_http_data_only:
            return (return_data)
        else:
            return (return_data, response_data.status,
                    response_data.getheaders())

    def sanitize_for_serialization(self, obj):
        """Builds a JSON POST object.

        If obj is None, return None.
        If obj is str, int, long, float, bool, return directly.
        If obj is datetime.datetime, datetime.date
            convert to string in iso8601 format.
        If obj is list, sanitize each element in the list.
        If obj is dict, return the dict.
        If obj is swagger model, return the properties dict.

        :param obj: The data to serialize.
        :return: The serialized form of data.
        """
        if obj is None:
            return None
        elif isinstance(obj, self.PRIMITIVE_TYPES):
            return obj
        elif isinstance(obj, list):
            return [
                self.sanitize_for_serialization(sub_obj) for sub_obj in obj
            ]
        elif isinstance(obj, tuple):
            return tuple(
                self.sanitize_for_serialization(sub_obj) for sub_obj in obj)
        elif isinstance(obj, (datetime.datetime, datetime.date)):
            return obj.isoformat()

        if isinstance(obj, dict):
            obj_dict = obj
        else:
            # Convert model obj to dict except
            # attributes `swagger_types`, `attribute_map`
            # and attributes which value is not None.
            # Convert attribute name to json key in
            # model definition for request.
            obj_dict = {
                obj.attribute_map[attr]: getattr(obj, attr)
                for attr, _ in six.iteritems(obj.swagger_types)
                if getattr(obj, attr) is not None
            }

        return {
            key: self.sanitize_for_serialization(val)
            for key, val in six.iteritems(obj_dict)
        }

    def deserialize(self, response, response_type):
        """Deserializes response into an object.

        :param response: RESTResponse object to be deserialized.
        :param response_type: class literal for
            deserialized object, or string of class name.

        :return: deserialized object.
        """
        # handle file downloading
        # save response body into a tmp file and return the instance
        if response_type == "file":
            return self.__deserialize_file(response)

        # fetch data from response object
        try:
            data = json.loads(response.data)
        except ValueError:
            data = response.data

        return self.__deserialize(data, response_type)

    def __deserialize(self, data, klass):
        """Deserializes dict, list, str into an object.

        :param data: dict, list or str.
        :param klass: class literal, or string of class name.

        :return: object.
        """
        if data is None:
            return None

        if type(klass) == str:
            if klass.startswith('list['):
                sub_kls = re.match('list\[(.*)\]', klass).group(1)
                return [
                    self.__deserialize(sub_data, sub_kls) for sub_data in data
                ]

            if klass.startswith('dict('):
                sub_kls = re.match('dict\(([^,]*), (.*)\)', klass).group(2)
                return {
                    k: self.__deserialize(v, sub_kls)
                    for k, v in six.iteritems(data)
                }

            # convert str to class
            if klass in self.NATIVE_TYPES_MAPPING:
                klass = self.NATIVE_TYPES_MAPPING[klass]
            else:
                klass = getattr(flagr.models, klass)

        if klass in self.PRIMITIVE_TYPES:
            return self.__deserialize_primitive(data, klass)
        elif klass == object:
            return self.__deserialize_object(data)
        elif klass == datetime.date:
            return self.__deserialize_date(data)
        elif klass == datetime.datetime:
            return self.__deserialize_datatime(data)
        else:
            return self.__deserialize_model(data, klass)

    def call_api(self,
                 resource_path,
                 method,
                 path_params=None,
                 query_params=None,
                 header_params=None,
                 body=None,
                 post_params=None,
                 files=None,
                 response_type=None,
                 auth_settings=None,
                 is_async=None,
                 _return_http_data_only=None,
                 collection_formats=None,
                 _preload_content=True,
                 _request_timeout=None):
        """Makes the HTTP request (synchronous) and returns deserialized data.

        To make an async request, set the is_async parameter.

        :param resource_path: Path to method endpoint.
        :param method: Method to call.
        :param path_params: Path parameters in the url.
        :param query_params: Query parameters in the url.
        :param header_params: Header parameters to be
            placed in the request header.
        :param body: Request body.
        :param post_params dict: Request post form parameters,
            for `application/x-www-form-urlencoded`, `multipart/form-data`.
        :param auth_settings list: Auth Settings names for the request.
        :param response: Response data type.
        :param files dict: key -> filename, value -> filepath,
            for `multipart/form-data`.
        :param is_async bool: execute request asynchronously
        :param _return_http_data_only: response data without head status code
                                       and headers
        :param collection_formats: dict of collection formats for path, query,
            header, and post parameters.
        :param _preload_content: if False, the urllib3.HTTPResponse object will
                                 be returned without reading/decoding response
                                 data. Default is True.
        :param _request_timeout: timeout setting for this request. If one
                                 number provided, it will be total request
                                 timeout. It can also be a pair (tuple) of
                                 (connection, read) timeouts.
        :return:
            If is_async parameter is True,
            the request will be called asynchronously.
            The method will return the request thread.
            If parameter is_async is False or missing,
            then the method will return the response directly.
        """
        if not is_async:
            return self.__call_api(resource_path, method, path_params,
                                   query_params, header_params, body,
                                   post_params, files, response_type,
                                   auth_settings, _return_http_data_only,
                                   collection_formats, _preload_content,
                                   _request_timeout)
        else:
            thread = self.pool.apply_async(
                self.__call_api,
                (resource_path, method, path_params, query_params,
                 header_params, body, post_params, files, response_type,
                 auth_settings, _return_http_data_only, collection_formats,
                 _preload_content, _request_timeout))
        return thread

    def request(self,
                method,
                url,
                query_params=None,
                headers=None,
                post_params=None,
                body=None,
                _preload_content=True,
                _request_timeout=None):
        """Makes the HTTP request using RESTClient."""
        if method == "GET":
            return self.rest_client.GET(url,
                                        query_params=query_params,
                                        _preload_content=_preload_content,
                                        _request_timeout=_request_timeout,
                                        headers=headers)
        elif method == "HEAD":
            return self.rest_client.HEAD(url,
                                         query_params=query_params,
                                         _preload_content=_preload_content,
                                         _request_timeout=_request_timeout,
                                         headers=headers)
        elif method == "OPTIONS":
            return self.rest_client.OPTIONS(url,
                                            query_params=query_params,
                                            headers=headers,
                                            post_params=post_params,
                                            _preload_content=_preload_content,
                                            _request_timeout=_request_timeout,
                                            body=body)
        elif method == "POST":
            return self.rest_client.POST(url,
                                         query_params=query_params,
                                         headers=headers,
                                         post_params=post_params,
                                         _preload_content=_preload_content,
                                         _request_timeout=_request_timeout,
                                         body=body)
        elif method == "PUT":
            return self.rest_client.PUT(url,
                                        query_params=query_params,
                                        headers=headers,
                                        post_params=post_params,
                                        _preload_content=_preload_content,
                                        _request_timeout=_request_timeout,
                                        body=body)
        elif method == "PATCH":
            return self.rest_client.PATCH(url,
                                          query_params=query_params,
                                          headers=headers,
                                          post_params=post_params,
                                          _preload_content=_preload_content,
                                          _request_timeout=_request_timeout,
                                          body=body)
        elif method == "DELETE":
            return self.rest_client.DELETE(url,
                                           query_params=query_params,
                                           headers=headers,
                                           _preload_content=_preload_content,
                                           _request_timeout=_request_timeout,
                                           body=body)
        else:
            raise ValueError("http method must be `GET`, `HEAD`, `OPTIONS`,"
                             " `POST`, `PATCH`, `PUT` or `DELETE`.")

    def parameters_to_tuples(self, params, collection_formats):
        """Get parameters as list of tuples, formatting collections.

        :param params: Parameters as dict or list of two-tuples
        :param dict collection_formats: Parameter collection formats
        :return: Parameters as list of tuples, collections formatted
        """
        new_params = []
        if collection_formats is None:
            collection_formats = {}
        for k, v in six.iteritems(params) if isinstance(
                params, dict) else params:  # noqa: E501
            if k in collection_formats:
                collection_format = collection_formats[k]
                if collection_format == 'multi':
                    new_params.extend((k, value) for value in v)
                else:
                    if collection_format == 'ssv':
                        delimiter = ' '
                    elif collection_format == 'tsv':
                        delimiter = '\t'
                    elif collection_format == 'pipes':
                        delimiter = '|'
                    else:  # csv is the default
                        delimiter = ','
                    new_params.append(
                        (k, delimiter.join(str(value) for value in v)))
            else:
                new_params.append((k, v))
        return new_params

    def prepare_post_parameters(self, post_params=None, files=None):
        """Builds form parameters.

        :param post_params: Normal form parameters.
        :param files: File parameters.
        :return: Form parameters with files.
        """
        params = []

        if post_params:
            params = post_params

        if files:
            for k, v in six.iteritems(files):
                if not v:
                    continue
                file_names = v if type(v) is list else [v]
                for n in file_names:
                    with open(n, 'rb') as f:
                        filename = os.path.basename(f.name)
                        filedata = f.read()
                        mimetype = (mimetypes.guess_type(filename)[0]
                                    or 'application/octet-stream')
                        params.append(
                            tuple([k, tuple([filename, filedata, mimetype])]))

        return params

    def select_header_accept(self, accepts):
        """Returns `Accept` based on an array of accepts provided.

        :param accepts: List of headers.
        :return: Accept (e.g. application/json).
        """
        if not accepts:
            return

        accepts = [x.lower() for x in accepts]

        if 'application/json' in accepts:
            return 'application/json'
        else:
            return ', '.join(accepts)

    def select_header_content_type(self, content_types):
        """Returns `Content-Type` based on an array of content_types provided.

        :param content_types: List of content-types.
        :return: Content-Type (e.g. application/json).
        """
        if not content_types:
            return 'application/json'

        content_types = [x.lower() for x in content_types]

        if 'application/json' in content_types or '*/*' in content_types:
            return 'application/json'
        else:
            return content_types[0]

    def update_params_for_auth(self, headers, querys, auth_settings):
        """Updates header and query params based on authentication setting.

        :param headers: Header parameters dict to be updated.
        :param querys: Query parameters tuple list to be updated.
        :param auth_settings: Authentication setting identifiers list.
        """
        if not auth_settings:
            return

        for auth in auth_settings:
            auth_setting = self.configuration.auth_settings().get(auth)
            if auth_setting:
                if not auth_setting['value']:
                    continue
                elif auth_setting['in'] == 'header':
                    headers[auth_setting['key']] = auth_setting['value']
                elif auth_setting['in'] == 'query':
                    querys.append((auth_setting['key'], auth_setting['value']))
                else:
                    raise ValueError(
                        'Authentication token must be in `query` or `header`')

    def __deserialize_file(self, response):
        """Deserializes body to file

        Saves response body into a file in a temporary folder,
        using the filename from the `Content-Disposition` header if provided.

        :param response:  RESTResponse.
        :return: file path.
        """
        fd, path = tempfile.mkstemp(dir=self.configuration.temp_folder_path)
        os.close(fd)
        os.remove(path)

        content_disposition = response.getheader("Content-Disposition")
        if content_disposition:
            filename = re.search(r'filename=[\'"]?([^\'"\s]+)[\'"]?',
                                 content_disposition).group(1)
            path = os.path.join(os.path.dirname(path), filename)

        with open(path, "wb") as f:
            f.write(response.data)

        return path

    def __deserialize_primitive(self, data, klass):
        """Deserializes string to primitive type.

        :param data: str.
        :param klass: class literal.

        :return: int, long, float, str, bool.
        """
        try:
            return klass(data)
        except UnicodeEncodeError:
            return six.text_type(data)
        except TypeError:
            return data

    def __deserialize_object(self, value):
        """Return a original value.

        :return: object.
        """
        return value

    def __deserialize_date(self, string):
        """Deserializes string to date.

        :param string: str.
        :return: date.
        """
        try:
            from dateutil.parser import parse
            return parse(string).date()
        except ImportError:
            return string
        except ValueError:
            raise rest.ApiException(
                status=0,
                reason="Failed to parse `{0}` as date object".format(string))

    def __deserialize_datatime(self, string):
        """Deserializes string to datetime.

        The string should be in iso8601 datetime format.

        :param string: str.
        :return: datetime.
        """
        try:
            from dateutil.parser import parse
            return parse(string)
        except ImportError:
            return string
        except ValueError:
            raise rest.ApiException(
                status=0,
                reason=(
                    "Failed to parse `{0}` as datetime object".format(string)))

    def __deserialize_model(self, data, klass):
        """Deserializes list or dict to model.

        :param data: dict, list.
        :param klass: class literal.
        :return: model object.
        """

        if not klass.swagger_types and not hasattr(klass,
                                                   'get_real_child_model'):
            return data

        kwargs = {}
        if klass.swagger_types is not None:
            for attr, attr_type in six.iteritems(klass.swagger_types):
                if (data is not None and klass.attribute_map[attr] in data
                        and isinstance(data, (list, dict))):
                    value = data[klass.attribute_map[attr]]
                    kwargs[attr] = self.__deserialize(value, attr_type)

        instance = klass(**kwargs)

        if hasattr(instance, 'get_real_child_model'):
            klass_name = instance.get_real_child_model(data)
            if klass_name:
                instance = self.__deserialize(data, klass_name)
        return instance
def case_6():
    pool = ThreadPool(cpu_count())
    for _ in range(100):
        pool.apply_async(call_url)
    pool.close()
    pool.join()
Exemple #38
0
    def map(self,
            func,
            iterdata,
            extra_env=None,
            extra_meta=None,
            invoke_pool_threads=64,
            data_all_as_one=True,
            use_cached_runtime=True,
            overwrite_invoke_args=None,
            exclude_modules=[]):
        """
        # FIXME work with an actual iterable instead of just a list

        data_all_as_one : upload the data as a single object; fewer
        tcp transactions (good) but potentially higher latency for workers (bad)

        use_cached_runtime : if runtime has been cached, use that. When set
        to False, redownloads runtime.
        """

        data = list(iterdata)
        if not data:
            return []

        host_job_meta = {}

        pool = ThreadPool(invoke_pool_threads)
        callset_id = wrenutil.create_callset_id()

        ### pickle func and all data (to capture module dependencies
        func_and_data_ser, mod_paths = self.serializer([func] + data)
        print mod_paths
        func_str = func_and_data_ser[0]
        data_strs = func_and_data_ser[1:]
        data_size_bytes = sum(len(x) for x in data_strs)
        agg_data_key = None
        host_job_meta['agg_data'] = False
        host_job_meta['data_size_bytes'] = data_size_bytes

        if data_size_bytes < wrenconfig.MAX_AGG_DATA_SIZE and data_all_as_one:
            agg_data_key = storage_utils.create_agg_data_key(
                self.storage.prefix, callset_id)
            agg_data_bytes, agg_data_ranges = self.agg_data(data_strs)
            agg_upload_time = time.time()
            self.storage.put_data(agg_data_key, agg_data_bytes)
            host_job_meta['agg_data'] = True
            host_job_meta['data_upload_time'] = time.time() - agg_upload_time
            host_job_meta['data_upload_timestamp'] = time.time()
        else:
            # FIXME add warning that you wanted data all as one but
            # it exceeded max data size
            pass

        for module in exclude_modules:
            for mod_path in list(mod_paths):
                if module in mod_path and mod_path in mod_paths:
                    mod_paths.remove(mod_path)

        module_data = create_mod_data(mod_paths)
        func_str_encoded = wrenutil.bytes_to_b64str(func_str)
        #debug_foo = {'func' : func_str_encoded,
        #             'module_data' : module_data}

        #pickle.dump(debug_foo, open("/tmp/py35.debug.pickle", 'wb'))
        ### Create func and upload
        func_module_str = json.dumps({
            'func': func_str_encoded,
            'module_data': module_data
        })
        host_job_meta['func_module_str_len'] = len(func_module_str)

        func_upload_time = time.time()
        func_key = create_func_key(self.storage.prefix, callset_id)
        self.storage.put_func(func_key, func_module_str)
        host_job_meta['func_upload_time'] = time.time() - func_upload_time
        host_job_meta['func_upload_timestamp'] = time.time()

        def invoke(data_str,
                   callset_id,
                   call_id,
                   func_key,
                   host_job_meta,
                   agg_data_key=None,
                   data_byte_range=None):
            data_key, output_key, status_key \
                = storage_utils.create_keys(self.storage.prefix, callset_id, call_id)

            host_job_meta['job_invoke_timestamp'] = time.time()
            if agg_data_key is None:
                data_upload_time = time.time()
                self.put_data(data_key, data_str, callset_id, call_id)
                data_upload_time = time.time() - data_upload_time
                host_job_meta['data_upload_time'] = data_upload_time
                host_job_meta['data_upload_timestamp'] = time.time()

                data_key = data_key
            else:
                data_key = agg_data_key

            return self.invoke_with_keys(
                func_key,
                data_key,
                output_key,
                status_key,
                callset_id,
                call_id,
                extra_env,
                extra_meta,
                data_byte_range,
                use_cached_runtime,
                host_job_meta.copy(),
                self.job_max_runtime,
                overwrite_invoke_args=overwrite_invoke_args)

        N = len(data)
        call_result_objs = []
        for i in range(N):
            call_id = "{:05d}".format(i)

            data_byte_range = None
            if agg_data_key is not None:
                data_byte_range = agg_data_ranges[i]

            cb = pool.apply_async(
                invoke, (data_strs[i], callset_id, call_id, func_key,
                         host_job_meta.copy(), agg_data_key, data_byte_range))

            logger.info("map {} {} apply async".format(callset_id, call_id))

            call_result_objs.append(cb)

        res = [c.get() for c in call_result_objs]
        pool.close()
        pool.join()
        logger.info("map invoked {} {} pool join".format(callset_id, call_id))

        # FIXME take advantage of the callset to return a lot of these

        # note these are just the invocation futures

        return res
Exemple #39
0
def create_themeball(report, progress=None, abort=None):
    pool = ThreadPool(processes=cpu_count())
    buf = BytesIO()
    num = count()
    error_occurred = Event()

    def optimize(name):
        if abort is not None and abort.is_set():
            return
        if error_occurred.is_set():
            return
        try:
            i = next(num)
            if progress is not None:
                progress(i, _('Optimizing %s') % name)
            srcpath = os.path.join(report.path, name)
            ext = srcpath.rpartition('.')[-1].lower()
            if ext == 'png':
                optimize_png(srcpath)
            elif ext in ('jpg', 'jpeg'):
                optimize_jpeg(srcpath)
        except Exception:
            return sys.exc_info()

    errors = tuple(
        filter(None, pool.map(optimize, tuple(report.name_map.iterkeys()))))
    pool.close(), pool.join()
    if abort is not None and abort.is_set():
        return
    if errors:
        e = errors[0]
        reraise(*e)

    if progress is not None:
        progress(next(num), _('Creating theme file'))
    with ZipFile(buf, 'w') as zf:
        for name in report.name_map:
            srcpath = os.path.join(report.path, name)
            with lopen(srcpath, 'rb') as f:
                zf.writestr(name, f.read(), compression=ZIP_STORED)
    buf.seek(0)
    out = BytesIO()
    if abort is not None and abort.is_set():
        return None, None
    if progress is not None:
        progress(next(num), _('Compressing theme file'))
    compress(buf, out, level=9)
    buf = BytesIO()
    prefix = report.name
    if abort is not None and abort.is_set():
        return None, None
    with ZipFile(buf, 'w') as zf:
        with lopen(os.path.join(report.path, THEME_METADATA), 'rb') as f:
            zf.writestr(prefix + '/' + THEME_METADATA, f.read())
        zf.writestr(prefix + '/' + THEME_COVER, create_cover(report))
        zf.writestr(prefix + '/' + 'icons.zip.xz',
                    out.getvalue(),
                    compression=ZIP_STORED)
    if progress is not None:
        progress(next(num), _('Finished'))
    return buf.getvalue(), prefix
Exemple #40
0
def main():
  """
  Main entry point for the test suite.
  """
  t0 = time.time()
  num_cpus = multiprocessing.cpu_count()
  mem_total = psutil.virtual_memory().total / (1024 * 1024)

  # configure the CLI
  parser = argparse.ArgumentParser()
  parser.add_argument("--exhaustive", help="check all configurations on all examples", action="store_true")
  parser.add_argument("--all-configs", help="check all configurations per example", action="store_true")
  parser.add_argument("--all-examples", help="check all examples", action="store_true")
  parser.add_argument("--folder", action="store", default="**", type=str,
                      help="sets the regressions folder to run")
  parser.add_argument("--threads", action="store", dest="n_threads", default=num_cpus, type=int,
                      help="execute regressions using the selected number of threads in parallel")
  parser.add_argument("--log", action="store", dest="log_level", default="DEBUG", type=str,
                      help="sets the logging level (DEBUG, INFO, WARNING)")
  parser.add_argument("--output-log", action="store", dest="log_path", type=str,
                      help="sets the output log path. (std out by default)")
  args = parser.parse_args()

  if args.exhaustive:
    args.all_examples = True;
    args.all_configs = True;

  # configure the logging
  log_format = ''
  log_level = logging.DEBUG

  # add more log levels later (if needed)
  if args.log_level.upper() == "INFO":
    log_level = logging.INFO
  elif args.log_level.upper() == "WARNING":
    log_level = logging.WARNING

  # if the user supplied a log path, write the logs to that file.
  # otherwise, write the logs to std out.
  if args.log_path:
    logging.basicConfig(filename=args.log_path, format=log_format, level=log_level)
  else:
    logging.basicConfig(format=log_format, level=log_level)

  logging.debug("Creating Pool with '%d' Workers" % args.n_threads)
  p = ThreadPool(processes=args.n_threads)

  try:
    # start the tests
    logging.info("Running regression tests...")

    # start processing the tests.
    results = []
    for test in sorted(glob.glob("./" + args.folder + "/*.c")):
      # get the meta data for this test
      meta = metadata(test)

      if meta['memory-limit'] > mem_total:
        continue

      if meta['skip'] == True:
        continue

      if meta['skip'] != False and not args.all_examples:
        continue

      # build up the subprocess command
      cmd = ['smack', test]
      cmd += ['--time-limit', str(meta['time-limit'])]
      cmd += meta['flags']

      for memory in meta['memory'][:100 if args.all_configs else 1]:
        cmd += ['--mem-mod=' + memory]

        for verifier in meta['verifiers'][:100 if args.all_configs else 1]:
          name = path.splitext(path.basename(test))[0]
          cmd += ['--verifier=' + verifier]
          cmd += ['-bc', "%s-%s-%s.bc" % (name, memory, verifier)]
          cmd += ['-bpl', "%s-%s-%s.bpl" % (name, memory, verifier)]
          r = p.apply_async(process_test,
                args=(cmd[:], test, memory, verifier, meta['expect'], meta['checkbpl'], meta['checkout'], args.log_path,),
                callback=tally_result)
          results.append(r)

    # keep the main thread active while there are active workers
    for r in results:
      r.wait()

  except KeyboardInterrupt:
    logging.debug("Caught KeyboardInterrupt, terminating workers")
    p.terminate() # terminate any remaining workers
    p.join()
  else:
    logging.debug("Quitting normally")
    # close the pool. this prevents any more tasks from being submitted.
    p.close()
    p.join() # wait for all workers to finish their tasks

  # log the elapsed time
  elapsed_time = time.time() - t0
  logging.info(' ELAPSED TIME [%.2fs]' % round(elapsed_time, 2))

  # log the test results
  logging.info(' PASSED count: %d' % passed)
  logging.info(' FAILED count: %d' % failed)
  logging.info(' TIMEOUT count: %d' % timeouts)
  logging.info(' UNKNOWN count: %d' % unknowns)

  # if there are any failed tests or tests that timed out, set the system
  # exit code to a failure status
  if timeouts > 0 or failed > 0 or unknowns > 0:
    sys.exit(1)
Exemple #41
0
    def refresh(self, datasource_names, merge_flag, refreshAll):
        """
        Fetches metadata for the specified datasources andm
        merges to the Superset database
        """
        session = db.session
        ds_list = (
            session.query(DruidDatasource)
            .filter(or_(DruidDatasource.datasource_name == name
                    for name in datasource_names))
        )

        ds_map = {ds.name: ds for ds in ds_list}
        for ds_name in datasource_names:
            datasource = ds_map.get(ds_name, None)
            if not datasource:
                datasource = DruidDatasource(datasource_name=ds_name)
                with session.no_autoflush:
                    session.add(datasource)
                flasher(
                    'Adding new datasource [{}]'.format(ds_name), 'success')
                ds_map[ds_name] = datasource
            elif refreshAll:
                flasher(
                    'Refreshing datasource [{}]'.format(ds_name), 'info')
            else:
                del ds_map[ds_name]
                continue
            datasource.cluster = self
            datasource.merge_flag = merge_flag
        session.flush()

        # Prepare multithreaded executation
        pool = ThreadPool()
        ds_refresh = list(ds_map.values())
        metadata = pool.map(_fetch_metadata_for, ds_refresh)
        pool.close()
        pool.join()

        for i in range(0, len(ds_refresh)):
            datasource = ds_refresh[i]
            cols = metadata[i]
            if cols:
                col_objs_list = (
                    session.query(DruidColumn)
                    .filter(DruidColumn.datasource_id == datasource.id)
                    .filter(or_(DruidColumn.column_name == col for col in cols))
                )
                col_objs = {col.column_name: col for col in col_objs_list}
                for col in cols:
                    if col == '__time':  # skip the time column
                        continue
                    col_obj = col_objs.get(col, None)
                    if not col_obj:
                        col_obj = DruidColumn(
                            datasource_id=datasource.id,
                            column_name=col)
                        with session.no_autoflush:
                            session.add(col_obj)
                    datatype = cols[col]['type']
                    if datatype == 'STRING':
                        col_obj.groupby = True
                        col_obj.filterable = True
                    if datatype == 'hyperUnique' or datatype == 'thetaSketch':
                        col_obj.count_distinct = True
                    # Allow sum/min/max for long or double
                    if datatype == 'LONG' or datatype == 'DOUBLE':
                        col_obj.sum = True
                        col_obj.min = True
                        col_obj.max = True
                    col_obj.type = datatype
                    col_obj.datasource = datasource
                datasource.generate_metrics_for(col_objs_list)
        session.commit()
    def validate(self, hostname, connection, ignore_no_ocsp=False):
        u"""
        Validates the certificate is not revoked using OCSP
        """
        global OCSP_VALIDATION_CACHE_UPDATED
        logger.debug(u'validating certificate: %s', hostname)
        if ignore_no_ocsp:
            logger.debug(u'validation was skipped.')
            return True

        if hostname in KNOWN_HOSTNAMES:  # skip OCSP validation if known
            logger.debug(
                'validation was skipped, because hostname %s is known',
                hostname)
            return True

        cert_data = _extract_certificate_chain(connection)

        pool = ThreadPool(len(cert_data))
        results = []
        try:
            for issuer_and_subject in cert_data:
                ocsp_uri = issuer_and_subject['subject'][
                    'ocsp_uri']  # issuer's ocsp uri
                ocsp_subject = issuer_and_subject['subject']
                ocsp_issuer = issuer_and_subject['issuer']
                logger.debug('ocsp_uri: %s', ocsp_uri)
                if ocsp_uri:
                    r = pool.apply_async(
                        self.validate_by_direct_connection_simple,
                        [ocsp_uri, ocsp_issuer, ocsp_subject])
                    results.append(r)
                else:
                    raise OperationalError(
                        msg=(u'NO OCSP URI was found: '
                             u'hostname={0}, subject={1}').format(
                                 hostname, ocsp_subject),
                        errno=ER_FAILED_TO_GET_OCSP_URI,
                    )
        finally:
            pool.close()
            pool.join()
            for r in results:
                if not r.successful():
                    raise OperationalError(
                        msg=(u'Failed to validate the certificate '
                             u'revocation status: '
                             u'hostname={0}, err={1}', hostname, r.get()))
            with OCSP_VALIDATION_CACHE_LOCK:
                if OCSP_VALIDATION_CACHE_UPDATED:
                    update_ocsp_response_cache_file(
                        self._ocsp_response_cache_uri)
                OCSP_VALIDATION_CACHE_UPDATED = False

            if len(results) != len(cert_data):
                raise OperationalError(
                    msg=u"Failed to validate the certificate "
                    u"revocation status. The number of validation "
                    u"didn't match: hostname={0}, retsults={1}, "
                    u"cert_data={2}".format(hostname, len(results),
                                            len(cert_data)),
                    errno=ER_INVALID_OCSP_RESPONSE)
        logger.debug(u'ok')
        # any failure must be an exception
        return True
Exemple #43
0
class ErrBot(Backend, StoreMixin):
    """ ErrBot is the layer taking care of commands management and dispatching.
    """
    __errdoc__ = """ Commands related to the bot administration """
    MSG_ERROR_OCCURRED = 'Computer says nooo. See logs for details'
    MSG_UNKNOWN_COMMAND = 'Unknown command: "%(command)s". '
    startup_time = datetime.now()

    def __init__(self, bot_config):
        log.debug("ErrBot init.")
        super().__init__(bot_config)
        self.bot_config = bot_config
        self.prefix = bot_config.BOT_PREFIX
        if bot_config.BOT_ASYNC:
            self.thread_pool = ThreadPool(bot_config.BOT_ASYNC_POOLSIZE)
            log.debug('created a thread pool of size %d.',
                      bot_config.BOT_ASYNC_POOLSIZE)
        self.commands = {
        }  # the dynamically populated list of commands available on the bot
        self.re_commands = {
        }  # the dynamically populated list of regex-based commands available on the bot
        self.command_filters = []  # the dynamically populated list of filters
        self.MSG_UNKNOWN_COMMAND = 'Unknown command: "%(command)s". ' \
                                   'Type "' + bot_config.BOT_PREFIX + 'help" for available commands.'
        if bot_config.BOT_ALT_PREFIX_CASEINSENSITIVE:
            self.bot_alt_prefixes = tuple(
                prefix.lower() for prefix in bot_config.BOT_ALT_PREFIXES)
        else:
            self.bot_alt_prefixes = bot_config.BOT_ALT_PREFIXES
        self.repo_manager = None
        self.plugin_manager = None
        self.storage_plugin = None
        self._plugin_errors_during_startup = None
        self.flow_executor = FlowExecutor(self)
        self._gbl = RLock()  # this protects internal structures of this class

    def attach_repo_manager(self, repo_manager):
        self.repo_manager = repo_manager

    def attach_plugin_manager(self, plugin_manager):
        self.plugin_manager = plugin_manager

    def attach_storage_plugin(self, storage_plugin):
        # the storage_plugin is needed by the plugins
        self.storage_plugin = storage_plugin

    def initialize_backend_storage(self):
        """
        Initialize storage for the backend to use.
        """
        log.debug("Initializing backend storage")
        assert self.plugin_manager is not None
        assert self.storage_plugin is not None
        self.open_storage(self.storage_plugin, f'{self.mode}_backend')

    @property
    def all_commands(self):
        """Return both commands and re_commands together."""
        with self._gbl:
            newd = dict(**self.commands)
            newd.update(self.re_commands)
        return newd

    def _dispatch_to_plugins(self, method, *args, **kwargs):
        """
        Dispatch the given method to all active plugins.

        Will catch and log any exceptions that occur.

        :param method: The name of the function to dispatch.
        :param *args: Passed to the callback function.
        :param **kwargs: Passed to the callback function.
        """
        for plugin in self.plugin_manager.get_all_active_plugins():
            plugin_name = plugin.name
            log.debug('Triggering %s on %s.', method, plugin_name)
            # noinspection PyBroadException
            try:
                getattr(plugin, method)(*args, **kwargs)
            except Exception:
                log.exception('%s on %s crashed.', method, plugin_name)

    def send(self,
             identifier,
             text,
             in_reply_to=None,
             groupchat_nick_reply=False):
        """ Sends a simple message to the specified user.

            :param identifier:
                an identifier from build_identifier or from an incoming message
            :param in_reply_to:
                the original message the bot is answering from
            :param text:
                the markdown text you want to send
            :param groupchat_nick_reply:
                authorized the prefixing with the nick form the user
        """
        # protect a little bit the backends here
        if not isinstance(identifier, Identifier):
            raise ValueError("identifier should be an Identifier")

        msg = self.build_message(text)
        msg.to = identifier
        msg.frm = in_reply_to.to if in_reply_to else self.bot_identifier
        msg.parent = in_reply_to

        nick_reply = self.bot_config.GROUPCHAT_NICK_PREFIXED
        if isinstance(identifier,
                      Room) and in_reply_to and (nick_reply
                                                 or groupchat_nick_reply):
            self.prefix_groupchat_reply(msg, in_reply_to.frm)

        self.split_and_send_message(msg)

    def send_templated(self,
                       identifier,
                       template_name,
                       template_parameters,
                       in_reply_to=None,
                       groupchat_nick_reply=False):
        """ Sends a simple message to the specified user using a template.

            :param template_parameters: the parameters for the template.
            :param template_name: the template name you want to use.
            :param identifier:
                an identifier from build_identifier or from an incoming message, a room etc.
            :param in_reply_to:
                the original message the bot is answering from
            :param groupchat_nick_reply:
                authorized the prefixing with the nick form the user
        """
        text = self.process_template(template_name, template_parameters)
        return self.send(identifier, text, in_reply_to, groupchat_nick_reply)

    def split_and_send_message(self, msg):
        for part in split_string_after(msg.body,
                                       self.bot_config.MESSAGE_SIZE_LIMIT):
            partial_message = msg.clone()
            partial_message.body = part
            partial_message.partial = True
            self.send_message(partial_message)

    def send_message(self, msg):
        """
        This needs to be overridden by the backends with a super() call.

        :param msg: the message to send.
        :return: None
        """
        for bot in self.plugin_manager.get_all_active_plugins():
            # noinspection PyBroadException
            try:
                bot.callback_botmessage(msg)
            except Exception:
                log.exception("Crash in a callback_botmessage handler")

    def send_card(self, card):
        """
        Sends a card, this can be overriden by the backends *without* a super() call.

        :param card: the card to send.
        :return: None
        """
        self.send_templated(card.to, 'card', {'card': card})

    def send_simple_reply(self, msg, text, private=False, threaded=False):
        """Send a simple response to a given incoming message

        :param private: if True will force a response in private.
        :param threaded: if True and if the backend supports it, sends the response in a threaded message.
        :param text: the markdown text of the message.
        :param msg: the message you are replying to.
        """
        reply = self.build_reply(msg, text, private=private, threaded=threaded)
        if isinstance(reply.to,
                      Room) and self.bot_config.GROUPCHAT_NICK_PREFIXED:
            self.prefix_groupchat_reply(reply, msg.frm)
        self.split_and_send_message(reply)

    def process_message(self, msg):
        """Check if the given message is a command for the bot and act on it.
        It return True for triggering the callback_messages on the .callback_messages on the plugins.

        :param msg: the incoming message.
        """
        # Prepare to handle either private chats or group chats

        frm = msg.frm
        text = msg.body
        if not hasattr(msg.frm, 'person'):
            raise Exception(
                f'msg.frm not an Identifier as it misses the "person" property.'
                f' Class of frm : {msg.frm.__class__}.')

        username = msg.frm.person
        user_cmd_history = self.cmd_history[username]

        if msg.delayed:
            log.debug('Message from history, ignore it.')
            return False

        if self.is_from_self(msg):
            log.debug("Ignoring message from self.")
            return False

        log.debug('*** frm = %s', frm)
        log.debug('*** username = %s', username)
        log.debug('*** text = %s', text)

        suppress_cmd_not_found = self.bot_config.SUPPRESS_CMD_NOT_FOUND

        prefixed = False  # Keeps track whether text was prefixed with a bot prefix
        only_check_re_command = False  # Becomes true if text is determed to not be a regular command
        tomatch = text.lower(
        ) if self.bot_config.BOT_ALT_PREFIX_CASEINSENSITIVE else text
        if len(self.bot_config.BOT_ALT_PREFIXES) > 0 and tomatch.startswith(
                self.bot_alt_prefixes):
            # Yay! We were called by one of our alternate prefixes. Now we just have to find out
            # which one... (And find the longest matching, in case you have 'err' and 'errbot' and
            # someone uses 'errbot', which also matches 'err' but would leave 'bot' to be taken as
            # part of the called command in that case)
            prefixed = True
            longest = 0
            for prefix in self.bot_alt_prefixes:
                length = len(prefix)
                if tomatch.startswith(prefix) and length > longest:
                    longest = length
            log.debug('Called with alternate prefix "%s"', text[:longest])
            text = text[longest:]

            # Now also remove the separator from the text
            for sep in self.bot_config.BOT_ALT_PREFIX_SEPARATORS:
                # While unlikely, one may have separators consisting of
                # more than one character
                length = len(sep)
                if text[:length] == sep:
                    text = text[length:]
        elif msg.is_direct and self.bot_config.BOT_PREFIX_OPTIONAL_ON_CHAT:
            log.debug(
                'Assuming "%s" to be a command because BOT_PREFIX_OPTIONAL_ON_CHAT is True',
                text)
            # In order to keep noise down we surpress messages about the command
            # not being found, because it's possible a plugin will trigger on what
            # was said with trigger_message.
            suppress_cmd_not_found = True
        elif not text.startswith(self.bot_config.BOT_PREFIX):
            only_check_re_command = True
        if text.startswith(self.bot_config.BOT_PREFIX):
            text = text[len(self.bot_config.BOT_PREFIX):]
            prefixed = True

        text = text.strip()
        text_split = text.split(' ')
        cmd = None
        command = None
        args = ''
        if not only_check_re_command:
            i = len(text_split)
            while cmd is None:
                command = '_'.join(text_split[:i])

                with self._gbl:
                    if command in self.commands:
                        cmd = command
                        args = ' '.join(text_split[i:])
                    else:
                        i -= 1
                if i == 0:
                    break

            if command == self.bot_config.BOT_PREFIX:  # we did "!!" so recall the last command
                if len(user_cmd_history):
                    cmd, args = user_cmd_history[-1]
                else:
                    return False  # no command in history
            elif command.isdigit(
            ):  # we did "!#" so we recall the specified command
                index = int(command)
                if len(user_cmd_history) >= index:
                    cmd, args = user_cmd_history[-index]
                else:
                    return False  # no command in history

        # Try to match one of the regex commands if the regular commands produced no match
        matched_on_re_command = False
        if not cmd:
            with self._gbl:
                if prefixed or (msg.is_direct and
                                self.bot_config.BOT_PREFIX_OPTIONAL_ON_CHAT):
                    commands = dict(self.re_commands)
                else:
                    commands = {
                        k: self.re_commands[k]
                        for k in self.re_commands
                        if not self.re_commands[k]._err_command_prefix_required
                    }

            for name, func in commands.items():
                if func._err_command_matchall:
                    match = list(func._err_command_re_pattern.finditer(text))
                else:
                    match = func._err_command_re_pattern.search(text)
                if match:
                    log.debug('Matching "%s" against "%s" produced a match.',
                              text, func._err_command_re_pattern.pattern)
                    matched_on_re_command = True
                    self._process_command(msg, name, text, match)
                else:
                    log.debug('Matching "%s" against "%s" produced no match.',
                              text, func._err_command_re_pattern.pattern)
        if matched_on_re_command:
            return True

        if cmd:
            self._process_command(msg, cmd, args, match=None)
        elif not only_check_re_command:
            log.debug("Command not found")
            for cmd_filter in self.command_filters:
                if getattr(cmd_filter, 'catch_unprocessed', False):
                    try:
                        reply = cmd_filter(msg,
                                           cmd,
                                           args,
                                           False,
                                           emptycmd=True)
                        if reply:
                            self.send_simple_reply(msg, reply)
                        # continue processing the other unprocessed cmd filters.
                    except Exception:
                        log.exception("Exception in a command filter command.")
        return True

    def _process_command_filters(self, msg, cmd, args, dry_run=False):
        try:
            for cmd_filter in self.command_filters:
                msg, cmd, args = cmd_filter(msg, cmd, args, dry_run)
                if msg is None:
                    return None, None, None
            return msg, cmd, args
        except Exception:
            log.exception(
                "Exception in a filter command, blocking the command in doubt")
            return None, None, None

    def _process_command(self, msg, cmd, args, match):
        """Process and execute a bot command"""

        # first it must go through the command filters
        msg, cmd, args = self._process_command_filters(msg, cmd, args, False)
        if msg is None:
            log.info('Command %s blocked or deferred.', cmd)
            return

        frm = msg.frm
        username = frm.person
        user_cmd_history = self.cmd_history[username]

        log.info(
            f'Processing command "{cmd}" with parameters "{args}" from {frm}')

        if (cmd, args) in user_cmd_history:
            user_cmd_history.remove(
                (cmd, args))  # Avoids duplicate history items

        with self._gbl:
            f = self.re_commands[cmd] if match else self.commands[cmd]

        if f._err_command_admin_only and self.bot_config.BOT_ASYNC:
            # If it is an admin command, wait until the queue is completely depleted so
            # we don't have strange concurrency issues on load/unload/updates etc...
            self.thread_pool.close()
            self.thread_pool.join()
            self.thread_pool = ThreadPool(self.bot_config.BOT_ASYNC_POOLSIZE)

        if f._err_command_historize:
            user_cmd_history.append(
                (cmd, args
                 ))  # add it to the history only if it is authorized to be so

        # Don't check for None here as None can be a valid argument to str.split.
        # '' was chosen as default argument because this isn't a valid argument to str.split()
        if not match and f._err_command_split_args_with != '':
            try:
                if hasattr(f._err_command_split_args_with, "parse_args"):
                    args = f._err_command_split_args_with.parse_args(args)
                elif callable(f._err_command_split_args_with):
                    args = f._err_command_split_args_with(args)
                else:
                    args = args.split(f._err_command_split_args_with)
            except Exception as e:
                self.send_simple_reply(
                    msg, f"Sorry, I couldn't parse your arguments. {e}")
                return

        if self.bot_config.BOT_ASYNC:
            result = self.thread_pool.apply_async(
                self._execute_and_send, [], {
                    'cmd': cmd,
                    'args': args,
                    'match': match,
                    'msg': msg,
                    'template_name': f._err_command_template
                })
            if f._err_command_admin_only:
                # Again, if it is an admin command, wait until the queue is completely
                # depleted so we don't have strange concurrency issues.
                result.wait()
        else:
            self._execute_and_send(cmd=cmd,
                                   args=args,
                                   match=match,
                                   msg=msg,
                                   template_name=f._err_command_template)

    @staticmethod
    def process_template(template_name, template_parameters):
        # integrated templating
        # The template needs to be set and the answer from the user command needs to be a mapping
        # If not just convert the answer to string.
        if template_name and isinstance(template_parameters,
                                        collections.Mapping):
            return tenv().get_template(template_name +
                                       '.md').render(**template_parameters)

        # Reply should be all text at this point (See https://github.com/errbotio/errbot/issues/96)
        return str(template_parameters)

    def _execute_and_send(self, cmd, args, match, msg, template_name=None):
        """Execute a bot command and send output back to the caller

        :param cmd: The command that was given to the bot (after being expanded)
        :param args: Arguments given along with cmd
        :param match: A re.MatchObject if command is coming from a regex-based command, else None
        :param msg: The message object
        :param template_name: The name of the jinja template which should be used to render
            the markdown output, if any

        """
        private = cmd in self.bot_config.DIVERT_TO_PRIVATE
        threaded = cmd in self.bot_config.DIVERT_TO_THREAD
        commands = self.re_commands if match else self.commands
        try:
            with self._gbl:
                method = commands[cmd]
            # first check if we need to reattach a flow context
            flow, _ = self.flow_executor.check_inflight_flow_triggered(
                cmd, msg.frm)
            if flow:
                log.debug("Reattach context from flow %s to the message",
                          flow._root.name)
                msg.ctx = flow.ctx
            elif method._err_command_flow_only:
                # check if it is a flow_only command but we are not in a flow.
                log.debug(
                    "%s is tagged flow_only and we are not in a flow. Ignores the command.",
                    cmd)
                return

            if inspect.isgeneratorfunction(method):
                replies = method(msg, match) if match else method(msg, args)
                for reply in replies:
                    if reply:
                        self.send_simple_reply(
                            msg, self.process_template(template_name, reply),
                            private, threaded)
            else:
                reply = method(msg, match) if match else method(msg, args)
                if reply:
                    self.send_simple_reply(
                        msg, self.process_template(template_name, reply),
                        private, threaded)

            # The command is a success, check if this has not made a flow progressed
            self.flow_executor.trigger(cmd, msg.frm, msg.ctx)

        except CommandError as command_error:
            reason = command_error.reason
            if command_error.template:
                reason = self.process_template(command_error.template, reason)
            self.send_simple_reply(msg, reason, private, threaded)

        except Exception as e:
            tb = traceback.format_exc()
            log.exception(
                f'An error happened while processing a message ("{msg.body}"): {tb}"'
            )
            self.send_simple_reply(msg, self.MSG_ERROR_OCCURRED + f':\n{e}',
                                   private, threaded)

    def unknown_command(self, _, cmd, args):
        """ Override the default unknown command behavior
        """
        full_cmd = cmd + ' ' + args.split(' ')[0] if args else None
        if full_cmd:
            msg = f'Command "{cmd}" / "{full_cmd}" not found.'
        else:
            msg = f'Command "{cmd}" not found.'
        ununderscore_keys = [m.replace('_', ' ') for m in self.commands.keys()]
        matches = difflib.get_close_matches(cmd, ununderscore_keys)
        if full_cmd:
            matches.extend(
                difflib.get_close_matches(full_cmd, ununderscore_keys))
        matches = set(matches)
        if matches:
            alternatives = ('" or "' +
                            self.bot_config.BOT_PREFIX).join(matches)
            msg += f'\n\nDid you mean "{self.bot_config.BOT_PREFIX}{alternatives}" ?'
        return msg

    def inject_commands_from(self, instance_to_inject):
        with self._gbl:
            plugin_name = instance_to_inject.name
            for name, value in inspect.getmembers(instance_to_inject,
                                                  inspect.ismethod):
                if getattr(value, '_err_command', False):
                    commands = self.re_commands if getattr(
                        value, '_err_re_command') else self.commands
                    name = getattr(value, '_err_command_name')

                    if name in commands:
                        f = commands[name]
                        new_name = (plugin_name + '-' + name).lower()
                        self.warn_admins(
                            f'{plugin_name}.{name} clashes with {type(f.__self__).__name__}.{f.__name__} '
                            f'so it has been renamed {new_name}')
                        name = new_name
                        value.__func__._err_command_name = new_name  # To keep track of the renaming.
                    commands[name] = value

                    if getattr(value, '_err_re_command'):
                        log.debug('Adding regex command : %s -> %s.', name,
                                  value.__name__)
                        self.re_commands = commands
                    else:
                        log.debug('Adding command : %s -> %s.', name,
                                  value.__name__)
                        self.commands = commands

    def inject_flows_from(self, instance_to_inject):
        classname = instance_to_inject.__class__.__name__
        for name, method in inspect.getmembers(instance_to_inject,
                                               inspect.ismethod):
            if getattr(method, '_err_flow', False):
                log.debug('Found new flow %s: %s', classname, name)
                flow = FlowRoot(name, method.__doc__)
                try:
                    method(flow)
                except Exception:
                    log.exception("Exception initializing a flow")

                self.flow_executor.add_flow(flow)

    def inject_command_filters_from(self, instance_to_inject):
        with self._gbl:
            for name, method in inspect.getmembers(instance_to_inject,
                                                   inspect.ismethod):
                if getattr(method, '_err_command_filter', False):
                    log.debug('Adding command filter: %s', name)
                    self.command_filters.append(method)

    def remove_flows_from(self, instance_to_inject):
        for name, value in inspect.getmembers(instance_to_inject,
                                              inspect.ismethod):
            if getattr(value, '_err_flow', False):
                log.debug('Remove flow %s', name)
                # TODO(gbin)

    def remove_commands_from(self, instance_to_inject):
        with self._gbl:
            for name, value in inspect.getmembers(instance_to_inject,
                                                  inspect.ismethod):
                if getattr(value, '_err_command', False):
                    name = getattr(value, '_err_command_name')
                    if getattr(value,
                               '_err_re_command') and name in self.re_commands:
                        del self.re_commands[name]
                    elif not getattr(
                            value,
                            '_err_re_command') and name in self.commands:
                        del self.commands[name]

    def remove_command_filters_from(self, instance_to_inject):
        with self._gbl:
            for name, method in inspect.getmembers(instance_to_inject,
                                                   inspect.ismethod):
                if getattr(method, '_err_command_filter', False):
                    log.debug('Removing command filter: %s', name)
                    self.command_filters.remove(method)

    def _admins_to_notify(self):
        """
        Creates a list of administrators to notify
        """
        admins_to_notify = self.bot_config.BOT_ADMINS_NOTIFICATIONS
        return admins_to_notify

    def warn_admins(self, warning: str) -> None:
        """
        Send a warning to the administrators of the bot.

        :param warning: The markdown-formatted text of the message to send.
        """
        for admin in self._admins_to_notify():
            self.send(self.build_identifier(admin), warning)
        log.warning(warning)

    def callback_message(self, msg):
        """Processes for commands and dispatches the message to all the plugins."""
        if self.process_message(msg):
            # Act only in the backend tells us that this message is OK to broadcast
            self._dispatch_to_plugins('callback_message', msg)

    def callback_mention(self, msg, people):
        log.debug("%s has/have been mentioned",
                  ', '.join(str(p) for p in people))
        self._dispatch_to_plugins('callback_mention', msg, people)

    def callback_presence(self, pres):
        self._dispatch_to_plugins('callback_presence', pres)

    def callback_room_joined(self, room):
        """
            Triggered when the bot has joined a MUC.

            :param room:
                An instance of :class:`~errbot.backends.base.MUCRoom`
                representing the room that was joined.
        """
        self._dispatch_to_plugins('callback_room_joined', room)

    def callback_room_left(self, room):
        """
            Triggered when the bot has left a MUC.

            :param room:
                An instance of :class:`~errbot.backends.base.MUCRoom`
                representing the room that was left.
        """
        self._dispatch_to_plugins('callback_room_left', room)

    def callback_room_topic(self, room):
        """
            Triggered when the topic in a MUC changes.

            :param room:
                An instance of :class:`~errbot.backends.base.MUCRoom`
                representing the room for which the topic changed.
        """
        self._dispatch_to_plugins('callback_room_topic', room)

    def callback_stream(self, stream):
        log.info('Initiated an incoming transfer %s.', stream)
        Tee(stream, self.plugin_manager.get_all_active_plugins()).start()

    def signal_connect_to_all_plugins(self):
        for bot in self.plugin_manager.get_all_active_plugins():
            if hasattr(bot, 'callback_connect'):
                # noinspection PyBroadException
                try:
                    log.debug('Trigger callback_connect on %s.',
                              bot.__class__.__name__)
                    bot.callback_connect()
                except Exception:
                    log.exception(f'callback_connect failed for {bot}.')

    def connect_callback(self):
        log.info('Activate internal commands')
        if self._plugin_errors_during_startup:
            errors = f'Some plugins failed to start during bot startup:\n\n{self._plugin_errors_during_startup}'
        else:
            errors = ''
        errors += self.plugin_manager.activate_non_started_plugins()
        if errors:
            self.warn_admins(errors)
            log.info(errors)
        log.info('Notifying connection to all the plugins...')
        self.signal_connect_to_all_plugins()
        log.info('Plugin activation done.')

    def disconnect_callback(self):
        log.info('Disconnect callback, deactivating all the plugins.')
        self.plugin_manager.deactivate_all_plugins()

    def get_doc(self, command):
        """Get command documentation
        """
        if not command.__doc__:
            return '(undocumented)'
        if self.prefix == '!':
            return command.__doc__
        ununderscore_keys = (m.replace('_', ' ')
                             for m in self.all_commands.keys())
        pat = re.compile(fr'!({"|".join(ununderscore_keys)})')
        return re.sub(pat, self.prefix + '\1', command.__doc__)

    @staticmethod
    def get_plugin_class_from_method(meth):
        for cls in inspect.getmro(type(meth.__self__)):
            if meth.__name__ in cls.__dict__:
                return cls
        return None

    def get_command_classes(self):
        return (self.get_plugin_class_from_method(command)
                for command in self.all_commands.values())

    def shutdown(self):
        self.close_storage()
        self.plugin_manager.shutdown()
        self.repo_manager.shutdown()

    def prefix_groupchat_reply(self, message: Message, identifier: Identifier):
        if message.body.startswith('#'):
            # Markdown heading, insert an extra newline to ensure the
            # markdown rendering doesn't break.
            message.body = "\n" + message.body
Exemple #44
0
    # print(r.status_code) # 返回状态码
    t = time.time()
    if r.status_code == 200:
        aliyunoss.upload_file(path, r.content)
        print(path)
        # with open(path, 'wb')as f:
        #     f.write(r.content)
        # f.close()
        # 将内容写入图片
    # print(time.time()-t)
    return


if __name__ == '__main__':

    with open('dongtaitu.json', 'r') as f:
        j = json.load(f)

    p = ThreadPool(200)
    for i in j:
        #print(i, i['src'])

        p.apply_async(func=download_img,
                      args=(i['src'], 'gif/{}.gif'.format(i['title'])))
        # break
        # download_img(i['src'],'./gif/{}.gif'.format(i['title']))
        #break
    p.close()
    p.join()
    print('done')
class SnowflakeChunkDownloader(object):
    u"""
    Large Result set chunk downloader class.
    """
    def _pre_init(self,
                  chunks,
                  connection,
                  cursor,
                  qrmk,
                  chunk_headers,
                  query_result_format='JSON',
                  prefetch_threads=DEFAULT_CLIENT_PREFETCH_THREADS,
                  use_ijson=False):
        self._use_ijson = use_ijson
        self._query_result_format = query_result_format

        self._downloader_error = None

        self._connection = connection
        self._cursor = cursor
        self._qrmk = qrmk
        self._chunk_headers = chunk_headers

        self._chunk_size = len(chunks)
        self._chunks = {}
        self._chunk_cond = Condition()

        self._effective_threads = min(prefetch_threads, self._chunk_size)
        if self._effective_threads < 1:
            self._effective_threads = 1

        for idx, chunk in enumerate(chunks):
            logger.debug(u"queued chunk %d: rowCount=%s", idx,
                         chunk[u'rowCount'])
            self._chunks[idx] = SnowflakeChunk(url=chunk[u'url'],
                                               result_data=None,
                                               ready=False,
                                               row_count=int(
                                                   chunk[u'rowCount']))

        logger.debug(
            u'prefetch threads: %s, '
            u'number of chunks: %s, '
            u'effective threads: %s', prefetch_threads, self._chunk_size,
            self._effective_threads)

        self._pool = ThreadPool(self._effective_threads)

        self._downloading_chunks_lock = Lock()
        self._total_millis_downloading_chunks = 0
        self._total_millis_parsing_chunks = 0

        self._next_chunk_to_consume = 0

    def __init__(self,
                 chunks,
                 connection,
                 cursor,
                 qrmk,
                 chunk_headers,
                 query_result_format='JSON',
                 prefetch_threads=DEFAULT_CLIENT_PREFETCH_THREADS,
                 use_ijson=False):
        self._pre_init(chunks,
                       connection,
                       cursor,
                       qrmk,
                       chunk_headers,
                       query_result_format=query_result_format,
                       prefetch_threads=prefetch_threads,
                       use_ijson=use_ijson)
        logger.debug('Chunk Downloader in memory')
        for idx in range(self._effective_threads):
            self._pool.apply_async(self._download_chunk, [idx])
        self._next_chunk_to_download = self._effective_threads

    def _download_chunk(self, idx):
        """
        Downloads a chunk asynchronously
        """
        logger.debug(u'downloading chunk %s/%s', idx + 1, self._chunk_size)
        headers = {}
        try:
            if self._chunk_headers is not None:
                headers = self._chunk_headers
                logger.debug(u'use chunk headers from result')
            elif self._qrmk is not None:
                headers[SSE_C_ALGORITHM] = SSE_C_AES
                headers[SSE_C_KEY] = self._qrmk

            logger.debug(u"started getting the result set %s: %s", idx + 1,
                         self._chunks[idx].url)
            result_data = self._fetch_chunk(self._chunks[idx].url, headers)
            logger.debug(u"finished getting the result set %s: %s", idx + 1,
                         self._chunks[idx].url)

            if isinstance(result_data, ResultIterWithTimings):
                metrics = result_data.get_timings()
                with self._downloading_chunks_lock:
                    self._total_millis_downloading_chunks += metrics[
                        ResultIterWithTimings.DOWNLOAD]
                    self._total_millis_parsing_chunks += metrics[
                        ResultIterWithTimings.PARSE]

            with self._chunk_cond:
                self._chunks[idx] = self._chunks[idx]._replace(
                    result_data=result_data, ready=True)
                self._chunk_cond.notify_all()
                logger.debug(u'added chunk %s/%s to a chunk list.', idx + 1,
                             self._chunk_size)
        except Exception as e:
            logger.exception(
                u'Failed to fetch the large result set chunk %s/%s', idx + 1,
                self._chunk_size)
            self._downloader_error = e

    def next_chunk(self):
        """
        Gets the next chunk if ready
        """
        logger.debug(
            u'next_chunk_to_consume={next_chunk_to_consume}, '
            u'next_chunk_to_download={next_chunk_to_download}, '
            u'total_chunks={total_chunks}'.format(
                next_chunk_to_consume=self._next_chunk_to_consume + 1,
                next_chunk_to_download=self._next_chunk_to_download + 1,
                total_chunks=self._chunk_size))
        if self._next_chunk_to_consume > 0:
            # clean up the previously fetched data
            n = self._next_chunk_to_consume - 1
            self._chunks[n] = self._chunks[n]._replace(result_data=None,
                                                       ready=False)

            if self._next_chunk_to_download < self._chunk_size:
                self._pool.apply_async(self._download_chunk,
                                       [self._next_chunk_to_download])
                self._next_chunk_to_download += 1

        if self._downloader_error is not None:
            raise self._downloader_error

        for attempt in range(MAX_RETRY_DOWNLOAD):
            logger.debug(
                u'waiting for chunk %s/%s'
                u' in %s/%s download attempt', self._next_chunk_to_consume + 1,
                self._chunk_size, attempt + 1, MAX_RETRY_DOWNLOAD)
            done = False
            for wait_counter in range(MAX_WAIT):
                with self._chunk_cond:
                    if self._downloader_error:
                        raise self._downloader_error
                    if self._chunks[self._next_chunk_to_consume].ready:
                        done = True
                        break
                    logger.debug(
                        u'chunk %s/%s is NOT ready to consume'
                        u' in %s/%s(s)', self._next_chunk_to_consume + 1,
                        self._chunk_size,
                        (wait_counter + 1) * WAIT_TIME_IN_SECONDS,
                        MAX_WAIT * WAIT_TIME_IN_SECONDS)
                    self._chunk_cond.wait(WAIT_TIME_IN_SECONDS)
            else:
                logger.debug(
                    u'chunk %s/%s is still NOT ready. Restarting chunk '
                    u'downloader threads', self._next_chunk_to_consume + 1,
                    self._chunk_size)
                self._pool.terminate()  # terminate the thread pool
                self._pool = ThreadPool(self._effective_threads)
                for idx0 in range(self._effective_threads):
                    idx = idx0 + self._next_chunk_to_consume
                    self._pool.apply_async(self._download_chunk, [idx])
            if done:
                break
        else:
            Error.errorhandler_wrapper(
                self._connection, self._cursor, OperationalError, {
                    u'msg': u'The result set chunk download fails or hang for '
                    u'unknown reason.',
                    u'errno': ER_CHUNK_DOWNLOAD_FAILED
                })
        logger.debug(u'chunk %s/%s is ready to consume',
                     self._next_chunk_to_consume + 1, self._chunk_size)

        ret = self._chunks[self._next_chunk_to_consume]
        self._next_chunk_to_consume += 1
        return ret

    def terminate(self):
        """
        Terminates downloading the chunks.
        """
        if hasattr(self, u'_pool') and self._pool is not None:
            self._pool.close()
            self._pool.join()
            self._pool = None

    def __del__(self):
        try:
            self.terminate()
        except:
            # ignore all errors in the destructor
            pass

    def _fetch_chunk(self, url, headers):
        """
        Fetch the chunk from S3.
        """
        handler = JsonBinaryHandler(is_raw_binary_iterator=True,
                                    use_ijson=self._use_ijson) \
            if self._query_result_format == 'json' else \
            ArrowBinaryHandler(self._cursor, self._connection)

        return self._connection.rest.fetch(u'get',
                                           url,
                                           headers,
                                           timeout=DEFAULT_REQUEST_TIMEOUT,
                                           is_raw_binary=True,
                                           binary_data_handler=handler)
Exemple #46
0
def datasets_evaluate(dataset_file):
    threading=True
    k=1
    kMax=10
    p_entity=0
    p_relation=0
    global correctRelations
    correctRelations=0
    global wrongRelations
    wrongRelations=0
    global correctEntities
    correctEntities=0
    global wrongEntities
    wrongEntities=0
    count=1
    startQ=0
    endQ=5000
    errors=0
    results=[]
    p_e=0
    p_r=0

    #questions=read_dataset('datasets/simplequestions.txt')
    
    
    filepath = 'datasets/'+dataset_file
    questions=read_dataset(filepath)

    
    if threading:
        pool = ThreadPool(12)
        pool.map(evaluate, questions[:50])
        pool.close()
        pool.join()
    else:
        for question in questions:
            try:
                single_result=evaluate(question)
                print(count)
                count=count+1
                print( "#####" + str((correctRelations * 100) / (correctRelations + wrongRelations)))
                print("#####" + str((correctEntities * 100) / (correctEntities + wrongEntities)))
                results.append(single_result)
                
            except:
                errors+=1
                print(errors)
                continue
     
        
    with open('results_simple_entities_FALCON.csv', mode='w', newline='', encoding='utf-8') as results_file:
        writer = csv.writer(results_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
        writer.writerows(results)    
    print("Correct Relations:",correctRelations)
    print("Relations:")
    print((correctRelations*100)/(correctRelations+wrongRelations))
    print("Correct Entities:",correctEntities)
    print("Entities:")
    print((correctEntities*100)/(correctEntities+wrongEntities))
    print(correctEntities+wrongEntities)
    ''''print("p_entity:")
class hxtool_scheduler:
    def __init__(self, thread_count=None):
        self._lock = threading.Lock()
        self.task_queue = {}
        self.history_queue = {}
        self.task_hx_api_sessions = {}
        self._poll_thread = threading.Thread(target=self._scan_task_queue,
                                             name="PollThread")
        self._stop_event = threading.Event()
        # Allow for thread oversubscription based on CPU count
        self.thread_count = thread_count or (cpu_count() + 1)
        self.task_threads = ThreadPool(self.thread_count)
        logger.info("Task scheduler initialized.")

    def _scan_task_queue(self):
        while not self._stop_event.wait(.1):
            ret = None
            with self._lock:
                ret = self.task_threads.imap_unordered(
                    self._run_task,
                    [_ for _ in self.task_queue.values() if _.should_run()])
            if ret:
                while not self._stop_event.is_set():
                    try:
                        ret.next(timeout=5)
                    except TimeoutError:
                        break
                    except StopIteration:
                        break
                    except Exception as e:
                        logger.error(pretty_exceptions(e))
                        continue

    def _run_task(self, task):
        ret = False
        task.set_state(TASK_STATE_QUEUED)
        logger.debug("Executing task with id: %s, name: %s.", task.task_id,
                     task.name)
        try:
            ret = task.run(self)
        except Exception as e:
            logger.error(pretty_exceptions(e))
            task.set_state(TASK_STATE_FAILED)
        finally:
            return ret

    def _add_task_api_task(self, profile_id, hx_host, hx_port, username,
                           password):
        self.task_hx_api_sessions[profile_id] = HXAPI(
            hx_host,
            hx_port=hx_port,
            proxies=hxtool_global.hxtool_config['network'].get('proxies'),
            headers=hxtool_global.hxtool_config['headers'],
            cookies=hxtool_global.hxtool_config['cookies'],
            logger_name=hxtool_logging.getLoggerName(HXAPI.__name__),
            default_encoding=default_encoding)
        api_login_task = hxtool_scheduler_task(
            profile_id, "Task API Login - {}".format(hx_host), immutable=True)
        api_login_task.add_step(hxtool_task_modules.task_api_session_module,
                                kwargs={
                                    'profile_id': profile_id,
                                    'username': username,
                                    'password': password
                                })
        self.add(api_login_task)

    def start(self):
        self._poll_thread.start()
        logger.info("Task scheduler started with %s threads.",
                    self.thread_count)

    def stop(self):
        logger.debug("stop() enter.")
        self._stop_event.set()
        logger.debug("Closing the task thread pool.")
        self.task_threads.close()
        logger.debug("Waiting for running threads to terminate.")
        self.task_threads.join()
        logger.debug("stop() exit.")

    def initialize_task_api_sessions(self):
        # Loop through background credentials and start the API sessions
        profiles = hxtool_global.hxtool_db.profileList()
        for profile in profiles:
            task_api_credential = hxtool_global.hxtool_db.backgroundProcessorCredentialGet(
                profile['profile_id'])
            if task_api_credential:
                try:
                    salt = HXAPI.b64(task_api_credential['salt'], True)
                    iv = HXAPI.b64(task_api_credential['iv'], True)
                    key = crypt_pbkdf2_hmacsha256(salt, TASK_API_KEY)
                    decrypted_background_password = crypt_aes(
                        key,
                        iv,
                        task_api_credential['hx_api_encrypted_password'],
                        decrypt=True)
                    self._add_task_api_task(
                        profile['profile_id'], profile['hx_host'],
                        profile['hx_port'],
                        task_api_credential['hx_api_username'],
                        decrypted_background_password)
                    decrypted_background_password = None
                except UnicodeDecodeError:
                    logger.error(
                        "Please reset the background credential for {} ({}).".
                        format(profile['hx_host'], profile['profile_id']))
            else:
                logger.info("No background credential for {} ({}).".format(
                    profile['hx_host'], profile['profile_id']))

    def add_task_api_session(self, profile_id, hx_host, hx_port, username,
                             password):
        iv = crypt_generate_random(16)
        salt = crypt_generate_random(32)
        key = crypt_pbkdf2_hmacsha256(salt, TASK_API_KEY)
        encrypted_password = crypt_aes(key, iv, password)
        hxtool_global.hxtool_db.backgroundProcessorCredentialCreate(
            profile_id, username, HXAPI.b64(iv), HXAPI.b64(salt),
            encrypted_password)
        encrypted_password = None
        self._add_task_api_task(profile_id, hx_host, hx_port, username,
                                password)
        password = None

    def remove_task_api_session(self, profile_id):
        out = hxtool_global.hxtool_db.backgroundProcessorCredentialRemove(
            profile_id)
        hx_api_object = self.task_hx_api_sessions.get(profile_id)
        if hx_api_object and hx_api_object.restIsSessionValid():
            (ret, response_code, response_data) = hx_api_object.restLogout()
            del self.task_hx_api_sessions[profile_id]

    def logout_task_api_sessions(self):
        for hx_api_object in self.task_hx_api_sessions.values():
            if hx_api_object is not None:
                hx_api_object.restLogout()
                hx_api_object = None

    def signal_child_tasks(self, parent_task_id, parent_task_state,
                           parent_stored_result):
        with self._lock:
            for task_id in self.task_queue:
                self.task_queue[task_id].parent_state_callback(
                    parent_task_id, parent_task_state, parent_stored_result)

    def add(self, task, should_store=True):
        with self._lock:
            self.task_queue[task.task_id] = task
            task.set_state(TASK_STATE_SCHEDULED)
            # Note: this must be within the lock otherwise we run into a nasty race condition where the task runs before the stored state is set -
            # with the run lock taking precedence.
            if should_store:
                task.store()
        return task.task_id

    def add_list(self, tasks):
        if isinstance(tasks, list):
            for t in tasks:
                self.add(t)

    def remove(self, task_id, delete_children=True):
        if task_id:
            with self._lock:
                if delete_children:
                    # We need to make a shallow copy so we don't modify the task_queue while iterating over it
                    for child_task_id in [
                            _.task_id for _ in self.task_queue.values()
                            if _.parent_id == task_id
                    ]:
                        self.task_queue[child_task_id].remove()
                        del self.task_queue[child_task_id]

                    for child_task_id in [
                            _['task_id'] for _ in self.history_queue.values()
                            if _['parent_id'] == task_id
                    ]:
                        del self.history_queue[child_task_id]

                t = self.task_queue.get(task_id, None)
                if t and not t.immutable:
                    t.remove()
                    del self.task_queue[task_id]
                    t = None
                elif task_id in self.history_queue:
                    del self.history_queue[task_id]

    def get(self, task_id):
        with self._lock:
            return self.task_queue.get(task_id, None)

    def move_to_history(self, task_id):
        with self._lock:
            t = self.task_queue.pop(task_id, None)
            if t is not None:
                self.history_queue[task_id] = t.metadata()
        if len(self.history_queue) > MAX_HISTORY_QUEUE_LENGTH:
            self.history_queue.popitem()

    def tasks(self):
        # Shallow copy to avoid locking
        return [_.metadata() for _ in list(self.task_queue.values())] + list(
            self.history_queue.values())

    # Load queued tasks from the database
    def load_from_database(self):
        try:
            if self.status():
                tasks = hxtool_global.hxtool_db.taskList()
                for task_entry in tasks:
                    p_id = task_entry.get('parent_id', None)
                    if p_id and (not task_entry['parent_complete']
                                 and not hxtool_global.hxtool_db.taskGet(
                                     task_entry['profile_id'], p_id)):
                        logger.warn("Deleting orphan task {}, {}".format(
                            task_entry['name'], task_entry['task_id']))
                        hxtool_global.hxtool_db.taskDelete(
                            task_entry['profile_id'], task_entry['task_id'])
                    else:
                        task = hxtool_scheduler_task.deserialize(task_entry)
                        task.set_stored()
                        # Set should_store to False as we've already been stored, and we skip a needless update
                        self.add(task, should_store=False)
            else:
                logger.warn(
                    "Task scheduler must be running before loading queued tasks from the database."
                )
        except Exception as e:
            logger.error(
                "Failed to load saved tasks from the database. Error: {}".
                format(pretty_exceptions(e)))

    def status(self):
        return self._poll_thread.is_alive()
Exemple #48
0
def exec_tls(suites_file,
             target,
             tls_opts,
             serv_set,
             handshake=False,
             weight=False):
    # Step 1: Parse service list
    print('--- STARTING CIPHERSUITE SELECTION PROCESS ---')
    print(f'\nParsing ciphersuites from {suites_file}'.ljust(
        settings.strlen, '.'),
          end=' ',
          flush=True)

    total_ciphersuites = utils.parse_services(suites_file)
    n_total = len(total_ciphersuites)
    success_ciphersuites = []
    not_ciphersuites = []
    error_ciphersuites = []
    current = 1

    print(f'ok\nGot {n_total} ciphersuites')
    print('\nRunning with options:')
    print(f'    -Starting security level: {tls_opts["sec_lvl"]}' +
          f'\n    -Ending security level: {tls_opts["max_sec_lvl"]}' +
          f'\n    -Starting input size: {tls_opts["msg_size"]} bytes' +
          f'\n    -Ending input size: {tls_opts["max_msg_size"]} bytes' +
          f'\n    -Number of tests: {tls_opts["n_tests"]}' +
          f'\n    -Data\'s directory: {tls_opts["path"]}'
          f'\n    -Generate statistics: {"No" if weight == False else "Yes"}')
    print('\n--- STARTING DATA ACQUISITION PROCESS ---')

    # Step 2: Compile libs and programs
    print(f'\nPrepararing libraries and programs'.ljust(settings.strlen, '.'),
          end=' ',
          flush=True)
    pool = ThreadPool(processes=2)
    async_result_make = pool.apply_async(utils.make_progs, (target, ))
    make_ret = async_result_make.get()

    if make_ret != 0:
        sys.exit(2)

    for suite in total_ciphersuites:
        print(f'\nStarting analysis for: {suite} ({current}/{n_total})')
        current += 1
        tls_opts['ciphersuite'] = suite

        # Step 3: Start server in thread 1
        print('    Starting server'.ljust(settings.strlen, '.'),
              end=' ',
              flush=True)
        async_result_srv = pool.apply_async(run_srv, (target, tls_opts))
        print('ok')

        # Step 4: Start client in thread 2
        print('    Starting client'.ljust(settings.strlen, '.'),
              end=' ',
              flush=True)
        async_result_cli = pool.apply_async(run_cli, (target, tls_opts))
        print('ok')

        # Step 5: Verify result from server and client
        srv_ret = async_result_srv.get()
        cli_ret = async_result_cli.get()

        if srv_ret == 1 and cli_ret == 1:
            not_ciphersuites.append(suite)

        elif srv_ret != 0 or cli_ret != 0:
            error_ciphersuites.append(suite)

        else:
            print('\n    Data successfully obtained!!!')
            success_ciphersuites.append(suite)

    pool.close()
    pool.join()
    n_success = len(success_ciphersuites)
    n_not = len(not_ciphersuites)
    n_error = len(error_ciphersuites)

    if weight != False:
        # Step 6: Analyse data and create comparison plots for all ciphersuites that ended successfully
        print('\n--- STARTING DATA PLOTS GENERATION PROCESS ---')
        make_figs(tls_opts['path'],
                  suites_file,
                  success_ciphersuites,
                  weight,
                  handshake=handshake,
                  serv_set=serv_set)

        # Step 7: For each target, save successful ciphersuites in a file
        # utils.write_ciphersuites('services', success_ciphersuites)

    # Step 8: Report final status
    print('\n--- FINAL STATUS ---')
    print('\nData generation:')
    print(f'    -Number of ciphersuites: {n_total}')
    print(f'    -Number of successes: {n_success}')
    print(f'    -Number of n/a: {n_not}')
    print(f'    -Number of errors: {n_error}')

    if n_not > 0:
        print('    -N/A ciphersuites:')

        for suite in not_ciphersuites:
            print(f'        {suite}')

    if n_error > 0:
        print('    -Error ciphersuites:')

        for suite in error_ciphersuites:
            print(f'        {suite}')

    if weight != False:
        print('\nPlots generation:')
        print(f'    -Number of ciphersuites: {n_success}')

    print('\nData aquisition and analysis has ended.')
    print(
        f'You can check all the csv data in the docs/{tls_opts["path"]} directory',
        end='')

    if weight != False:
        print(
            f' and the generated plots and statistics in the tools/statistics/{tls_opts["path"]} '
            +
            f'and tools/results/{tls_opts["path"]} directories, respectively',
            end='')

    print('.')
Exemple #49
0
class CmdUpload(object):
    """ This class is responsible for uploading packages to remotes. The flow is:
    - Collect all the packages to be uploaded with the UploadCollecter
    - Execute the upload. For every ref:
        - Upload the recipe of the ref: "_upload_recipe"
            - If not FORCE, check the date "_check_recipe_date", i.e. if there are
              changes, do not allow uploading if the remote date is newer than the
              local cache one
            - Retrieve the sources (exports_sources), if they are not cached, and
              uploading to a different remote. "complete_recipe_sources"
            - Gather files and create 2 .tgz (exports, exports_sources) with
              "_compress_recipe_files"
            - Decide which files have to be uploaded and deleted from the server
              based on the different with the remote snapshot "_recipe_files_to_upload"
              This can raise if upload policy is not overwrite
            - Execute the real transfer "remote_manager.upload_recipe()"
        - For every package_id of every ref: "_upload_package"
            - Gather files and create package.tgz. "_compress_package_files"
            - (Optional) Do the integrity check of the package
            - Decide which files to upload and delete from server:
              "_package_files_to_upload". Can raise if policy is NOT overwrite
            - Do the actual upload

    All the REVISIONS are local defined, not retrieved from servers

    This requires calling to the remote API methods:
    - get_recipe_sources() to get the export_sources if they are missing
    - get_recipe_snapshot() to do the diff and know what files to upload
    - get_package_snapshot() to do the diff and know what files to upload
    - get_recipe_manifest() to check the date and raise if policy requires
    - get_package_manifest() to raise if policy!=force and manifests change
    """
    def __init__(self, cache, user_io, remote_manager, loader, hook_manager):
        self._cache = cache
        self._user_io = user_io
        self._output = progress_bar.ProgressOutput(self._user_io.out)
        self._remote_manager = remote_manager
        self._loader = loader
        self._hook_manager = hook_manager
        self._upload_thread_pool = None
        self._exceptions_list = []

    def upload(self,
               reference_or_pattern,
               remotes,
               upload_recorder,
               package_id=None,
               all_packages=None,
               confirm=False,
               retry=None,
               retry_wait=None,
               integrity_check=False,
               policy=None,
               query=None,
               parallel_upload=False):
        t1 = time.time()

        collecter = _UploadCollecter(self._cache, self._user_io, self._output,
                                     self._loader)
        refs_by_remote = collecter.collect(package_id, reference_or_pattern,
                                           confirm, remotes, all_packages,
                                           query)

        if parallel_upload:
            self._user_io.disable_input()
        self._upload_thread_pool = ThreadPool(
            cpu_count() if parallel_upload else 1)

        for remote, refs in refs_by_remote.items():

            self._output.info("Uploading to remote '{}':".format(remote.name))

            def upload_ref(ref_conanfile_prefs):
                _ref, _conanfile, _prefs = ref_conanfile_prefs
                try:
                    self._upload_ref(_conanfile, _ref, _prefs, retry,
                                     retry_wait, integrity_check, policy,
                                     remote, upload_recorder, remotes)
                except BaseException as base_exception:
                    base_trace = traceback.format_exc()
                    self._exceptions_list.append(
                        (base_exception, _ref, base_trace, remote))

            self._upload_thread_pool.map(upload_ref,
                                         [(ref, conanfile, prefs)
                                          for (ref, conanfile, prefs) in refs])

        self._upload_thread_pool.close()
        self._upload_thread_pool.join()

        if len(self._exceptions_list) > 0:
            for exc, ref, trace, remote in self._exceptions_list:
                t = "recipe" if isinstance(ref,
                                           ConanFileReference) else "package"
                msg = "%s: Upload %s to '%s' failed: %s\n" % (
                    str(ref), t, remote.name, str(exc))
                if get_env("CONAN_VERBOSE_TRACEBACK", False):
                    msg += trace
                self._output.error(msg)
            raise ConanException("Errors uploading some packages")

        logger.debug("UPLOAD: Time manager upload: %f" % (time.time() - t1))

    def _upload_ref(self, conanfile, ref, prefs, retry, retry_wait,
                    integrity_check, policy, recipe_remote, upload_recorder,
                    remotes):
        """ Uploads the recipes and binaries identified by ref
        """
        assert (ref.revision
                is not None), "Cannot upload a recipe without RREV"
        conanfile_path = self._cache.package_layout(ref).conanfile()
        # FIXME: I think it makes no sense to specify a remote to "pre_upload"
        # FIXME: because the recipe can have one and the package a different one
        self._hook_manager.execute("pre_upload",
                                   conanfile_path=conanfile_path,
                                   reference=ref,
                                   remote=recipe_remote)
        msg = "\rUploading %s to remote '%s'" % (str(ref), recipe_remote.name)
        self._output.info(left_justify_message(msg))
        self._upload_recipe(ref, conanfile, retry, retry_wait, policy,
                            recipe_remote, remotes)
        upload_recorder.add_recipe(ref, recipe_remote.name, recipe_remote.url)

        # Now the binaries
        if prefs:
            total = len(prefs)
            p_remote = recipe_remote

            def upload_package_index(index_pref):
                index, pref = index_pref
                try:
                    up_msg = "\rUploading package %d/%d: %s to '%s'" % (
                        index + 1, total, str(pref.id), p_remote.name)
                    self._output.info(left_justify_message(up_msg))
                    self._upload_package(pref, retry, retry_wait,
                                         integrity_check, policy, p_remote)
                    upload_recorder.add_package(pref, p_remote.name,
                                                p_remote.url)
                except BaseException as pkg_exc:
                    trace = traceback.format_exc()
                    return pkg_exc, pref, trace, p_remote

            def upload_package_callback(ret):
                package_exceptions = [r for r in ret if r is not None]
                self._exceptions_list.extend(package_exceptions)
                if not package_exceptions:
                    # FIXME: I think it makes no sense to specify a remote to "post_upload"
                    # FIXME: because the recipe can have one and the package a different one
                    self._hook_manager.execute("post_upload",
                                               conanfile_path=conanfile_path,
                                               reference=ref,
                                               remote=recipe_remote)

            # This doesn't wait for the packages to end, so the function returns
            # and the "pool entry" for the recipe is released
            self._upload_thread_pool.map_async(
                upload_package_index,
                [(index, pref) for index, pref in enumerate(prefs)],
                callback=upload_package_callback)
        else:
            # FIXME: I think it makes no sense to specify a remote to "post_upload"
            # FIXME: because the recipe can have one and the package a different one
            self._hook_manager.execute("post_upload",
                                       conanfile_path=conanfile_path,
                                       reference=ref,
                                       remote=recipe_remote)

    def _upload_recipe(self, ref, conanfile, retry, retry_wait, policy, remote,
                       remotes):

        current_remote_name = self._cache.package_layout(
            ref).load_metadata().recipe.remote

        if remote.name != current_remote_name:
            complete_recipe_sources(self._remote_manager, self._cache,
                                    conanfile, ref, remotes)

        conanfile_path = self._cache.package_layout(ref).conanfile()
        self._hook_manager.execute("pre_upload_recipe",
                                   conanfile_path=conanfile_path,
                                   reference=ref,
                                   remote=remote)

        t1 = time.time()
        cache_files = self._compress_recipe_files(ref)

        with self._cache.package_layout(ref).update_metadata() as metadata:
            metadata.recipe.checksums = calc_files_checksum(cache_files)

        local_manifest = FileTreeManifest.loads(
            load(cache_files["conanmanifest.txt"]))

        remote_manifest = None
        if policy != UPLOAD_POLICY_FORCE:
            # Check SCM data for auto fields
            if hasattr(conanfile,
                       "scm") and (conanfile.scm.get("url") == "auto"
                                   or conanfile.scm.get("revision") == "auto"
                                   or conanfile.scm.get("type") is None
                                   or conanfile.scm.get("url") is None
                                   or conanfile.scm.get("revision") is None):
                raise ConanException(
                    "The recipe contains invalid data in the 'scm' attribute"
                    " (some 'auto' values or missing fields 'type', 'url' or"
                    " 'revision'). Use '--force' to ignore this error or export"
                    " again the recipe ('conan export' or 'conan create') to"
                    " fix these issues.")

            remote_manifest = self._check_recipe_date(ref, remote,
                                                      local_manifest)
        if policy == UPLOAD_POLICY_SKIP:
            return ref

        files_to_upload, deleted = self._recipe_files_to_upload(
            ref, policy, cache_files, remote, remote_manifest, local_manifest)

        if files_to_upload or deleted:
            self._remote_manager.upload_recipe(ref, files_to_upload, deleted,
                                               remote, retry, retry_wait)
            self._upload_recipe_end_msg(ref, remote)
        else:
            self._output.info("Recipe is up to date, upload skipped")
        duration = time.time() - t1
        log_recipe_upload(ref, duration, cache_files, remote.name)
        self._hook_manager.execute("post_upload_recipe",
                                   conanfile_path=conanfile_path,
                                   reference=ref,
                                   remote=remote)

        # The recipe wasn't in the registry or it has changed the revision field only
        if not current_remote_name:
            with self._cache.package_layout(ref).update_metadata() as metadata:
                metadata.recipe.remote = remote.name

        return ref

    def _upload_package(self,
                        pref,
                        retry=None,
                        retry_wait=None,
                        integrity_check=False,
                        policy=None,
                        p_remote=None):

        assert (pref.revision
                is not None), "Cannot upload a package without PREV"
        assert (pref.ref.revision
                is not None), "Cannot upload a package without RREV"

        pkg_layout = self._cache.package_layout(pref.ref)
        conanfile_path = pkg_layout.conanfile()
        self._hook_manager.execute("pre_upload_package",
                                   conanfile_path=conanfile_path,
                                   reference=pref.ref,
                                   package_id=pref.id,
                                   remote=p_remote)

        t1 = time.time()
        the_files = self._compress_package_files(pref, integrity_check)

        if policy == UPLOAD_POLICY_SKIP:
            return None
        files_to_upload, deleted = self._package_files_to_upload(
            pref, policy, the_files, p_remote)

        if files_to_upload or deleted:
            self._remote_manager.upload_package(pref, files_to_upload, deleted,
                                                p_remote, retry, retry_wait)
            logger.debug("UPLOAD: Time upload package: %f" %
                         (time.time() - t1))
        else:
            self._output.info("Package is up to date, upload skipped")

        duration = time.time() - t1
        log_package_upload(pref, duration, the_files, p_remote)
        self._hook_manager.execute("post_upload_package",
                                   conanfile_path=conanfile_path,
                                   reference=pref.ref,
                                   package_id=pref.id,
                                   remote=p_remote)

        logger.debug("UPLOAD: Time uploader upload_package: %f" %
                     (time.time() - t1))

        # Update the package metadata
        checksums = calc_files_checksum(the_files)
        with pkg_layout.update_metadata() as metadata:
            cur_package_remote = metadata.packages[pref.id].remote
            if not cur_package_remote:
                metadata.packages[pref.id].remote = p_remote.name
            metadata.packages[pref.id].checksums = checksums

        return pref

    def _compress_recipe_files(self, ref):
        export_folder = self._cache.package_layout(ref).export()

        for f in (EXPORT_TGZ_NAME, EXPORT_SOURCES_TGZ_NAME):
            tgz_path = os.path.join(export_folder, f)
            if is_dirty(tgz_path):
                self._output.warn("%s: Removing %s, marked as dirty" %
                                  (str(ref), f))
                os.remove(tgz_path)
                clean_dirty(tgz_path)

        files, symlinks = gather_files(export_folder)
        if CONANFILE not in files or CONAN_MANIFEST not in files:
            raise ConanException("Cannot upload corrupted recipe '%s'" %
                                 str(ref))
        export_src_folder = self._cache.package_layout(ref).export_sources()
        src_files, src_symlinks = gather_files(export_src_folder)
        the_files = _compress_recipe_files(files, symlinks, src_files,
                                           src_symlinks, export_folder,
                                           self._output)

        return the_files

    def _compress_package_files(self, pref, integrity_check):

        t1 = time.time()
        # existing package, will use short paths if defined
        package_folder = self._cache.package_layout(
            pref.ref, short_paths=None).package(pref)

        if is_dirty(package_folder):
            raise ConanException("Package %s is corrupted, aborting upload.\n"
                                 "Remove it with 'conan remove %s -p=%s'" %
                                 (pref, pref.ref, pref.id))
        tgz_path = os.path.join(package_folder, PACKAGE_TGZ_NAME)
        if is_dirty(tgz_path):
            self._output.warn("%s: Removing %s, marked as dirty" %
                              (str(pref), PACKAGE_TGZ_NAME))
            os.remove(tgz_path)
            clean_dirty(tgz_path)
        # Get all the files in that directory
        files, symlinks = gather_files(package_folder)

        if CONANINFO not in files or CONAN_MANIFEST not in files:
            logger.error("Missing info or manifest in uploading files: %s" %
                         (str(files)))
            raise ConanException("Cannot upload corrupted package '%s'" %
                                 str(pref))

        logger.debug("UPLOAD: Time remote_manager build_files_set : %f" %
                     (time.time() - t1))
        if integrity_check:
            self._package_integrity_check(pref, files, package_folder)
            logger.debug(
                "UPLOAD: Time remote_manager check package integrity : %f" %
                (time.time() - t1))

        the_files = _compress_package_files(files, symlinks, package_folder,
                                            self._output)
        return the_files

    def _recipe_files_to_upload(self, ref, policy, files, remote,
                                remote_manifest, local_manifest):
        self._remote_manager.check_credentials(remote)
        remote_snapshot = self._remote_manager.get_recipe_snapshot(ref, remote)
        if not remote_snapshot:
            return files, set()

        deleted = set(remote_snapshot).difference(files)
        if policy != UPLOAD_POLICY_FORCE:
            if remote_manifest is None:
                # This is the weird scenario, we have a snapshot but don't have a manifest.
                # Can be due to concurrency issues, so we can try retrieve it now
                try:
                    remote_manifest, _ = self._remote_manager.get_recipe_manifest(
                        ref, remote)
                except NotFoundException:
                    # This is weird, the manifest still not there, better upload everything
                    self._output.warn(
                        "The remote recipe doesn't have the 'conanmanifest.txt' "
                        "file and will be uploaded: '{}'".format(ref))
                    return files, deleted

            if remote_manifest == local_manifest:
                return None, None

            if policy in (UPLOAD_POLICY_NO_OVERWRITE,
                          UPLOAD_POLICY_NO_OVERWRITE_RECIPE):
                raise ConanException(
                    "Local recipe is different from the remote recipe. "
                    "Forbidden overwrite.")

        return files, deleted

    def _package_files_to_upload(self, pref, policy, the_files, remote):
        self._remote_manager.check_credentials(remote)
        remote_snapshot = self._remote_manager.get_package_snapshot(
            pref, remote)

        if remote_snapshot and policy != UPLOAD_POLICY_FORCE:
            if not is_package_snapshot_complete(remote_snapshot):
                return the_files, set()
            remote_manifest, _ = self._remote_manager.get_package_manifest(
                pref, remote)
            local_manifest = FileTreeManifest.loads(
                load(the_files["conanmanifest.txt"]))
            if remote_manifest == local_manifest:
                return None, None
            if policy == UPLOAD_POLICY_NO_OVERWRITE:
                raise ConanException(
                    "Local package is different from the remote package. Forbidden"
                    " overwrite.")
        deleted = set(remote_snapshot).difference(the_files)
        return the_files, deleted

    def _upload_recipe_end_msg(self, ref, remote):
        msg = "\rUploaded conan recipe '%s' to '%s'" % (str(ref), remote.name)
        url = remote.url.replace("https://api.bintray.com/conan",
                                 "https://bintray.com")
        msg += ": %s" % url
        self._output.info(left_justify_message(msg))

    def _package_integrity_check(self, pref, files, package_folder):
        # If package has been modified remove tgz to regenerate it
        self._output.rewrite_line("Checking package integrity...")

        # short_paths = None is enough if there exist short_paths
        layout = self._cache.package_layout(pref.ref, short_paths=None)
        read_manifest, expected_manifest = layout.package_manifests(pref)

        if read_manifest != expected_manifest:
            self._output.writeln("")
            diff = read_manifest.difference(expected_manifest)
            for fname, (h1, h2) in diff.items():
                self._output.warn(
                    "Mismatched checksum '%s' (manifest: %s, file: %s)" %
                    (fname, h1, h2))

            if PACKAGE_TGZ_NAME in files:
                tgz_path = os.path.join(package_folder, PACKAGE_TGZ_NAME)
                try:
                    os.unlink(tgz_path)
                except OSError:
                    pass
            error_msg = os.linesep.join(
                "Mismatched checksum '%s' (manifest: %s, file: %s)" %
                (fname, h1, h2) for fname, (h1, h2) in diff.items())
            logger.error("Manifests doesn't match!\n%s" % error_msg)
            raise ConanException("Cannot upload corrupted package '%s'" %
                                 str(pref))
        else:
            self._output.rewrite_line("Package integrity OK!")
        self._output.writeln("")

    def _check_recipe_date(self, ref, remote, local_manifest):
        try:
            remote_recipe_manifest, ref = self._remote_manager.get_recipe_manifest(
                ref, remote)
        except NotFoundException:
            return  # First time uploading this package

        if (remote_recipe_manifest != local_manifest
                and remote_recipe_manifest.time > local_manifest.time):
            self._print_manifest_information(remote_recipe_manifest,
                                             local_manifest, ref, remote)
            raise ConanException(
                "Remote recipe is newer than local recipe: "
                "\n Remote date: %s\n Local date: %s" %
                (remote_recipe_manifest.time, local_manifest.time))

        return remote_recipe_manifest

    def _print_manifest_information(self, remote_recipe_manifest,
                                    local_manifest, ref, remote):
        try:
            self._output.info("\n%s" % ("-" * 40))
            self._output.info("Remote manifest:")
            self._output.info(remote_recipe_manifest)
            self._output.info("Local manifest:")
            self._output.info(local_manifest)
            difference = remote_recipe_manifest.difference(local_manifest)
            if "conanfile.py" in difference:
                contents = load(self._cache.package_layout(ref).conanfile())
                endlines = "\\r\\n" if "\r\n" in contents else "\\n"
                self._output.info("Local 'conanfile.py' using '%s' line-ends" %
                                  endlines)
                remote_contents = self._remote_manager.get_recipe_path(
                    ref, path="conanfile.py", remote=remote)
                endlines = "\\r\\n" if "\r\n" in remote_contents else "\\n"
                self._output.info(
                    "Remote 'conanfile.py' using '%s' line-ends" % endlines)
            self._output.info("\n%s" % ("-" * 40))
        except Exception as e:
            self._output.info("Error printing information about the diff: %s" %
                              str(e))
Exemple #50
0
class Scheduler(MooseObject):
    """
    Base class for handling jobs asynchronously. To use this class, call .schedule()
    and supply a list of testers to schedule. Each group of testers supplied will begin
    running immediately.

    Syntax:
       .schedule([list of tester objects])

    A list of testers will be added to a queue and begin calling their derived run method.
    You can continue to add more testers to the queue in this fashion.

    Once all jobs have been scheduled, call .waitFinish() to wait until all jobs have
    finished.
    """

    @staticmethod
    def validParams():
        params = MooseObject.validParams()
        params.addRequiredParam('average_load',  64.0, "Average load to allow")
        params.addRequiredParam('max_processes', None, "Hard limit of maxium processes to use")
        params.addParam('min_reported_time', 10, "The minimum time elapsed before a job is reported as taking to long to run.")

        return params

    # This is what will be checked for when we look for valid schedulers
    IS_SCHEDULER = True

    def __init__(self, harness, params):
        MooseObject.__init__(self, harness, params)

        ## The test harness to run callbacks on
        self.harness = harness

        # Retrieve and store the TestHarness options for use in this object
        self.options = harness.getOptions()

        # The Scheduler class can be initialized with no "max_processes" argument and it'll default
        # to a soft limit. If however a max_processes is passed we'll treat it as a hard limit.
        # The difference is whether or not we allow single jobs to exceed the number of slots.
        if params['max_processes'] == None:
            self.available_slots = 1
            self.soft_limit = True
        else:
            self.available_slots = params['max_processes'] # hard limit
            self.soft_limit = False

        self.average_load = params['average_load']

        self.min_report_time = params['min_reported_time']

        # Initialize run_pool based on available slots
        self.run_pool = ThreadPool(processes=self.available_slots)

        # Initialize status_pool to only use 1 process (to prevent status messages from getting clobbered)
        self.status_pool = ThreadPool(processes=1)

        # Slot lock when processing resource allocations and modifying slots_in_use
        self.slot_lock = threading.Lock()

        # Job lock when modifying a jobs status
        self.activity_lock = threading.Lock()

        # Job count lock when modifying incoming/outgoing jobs
        self.job_count_lock = threading.Lock()

        # A combination of processors + threads (-j/-n) currently in use, that a job requires
        self.slots_in_use = 0

        # Count of jobs which need to complete
        self.job_count = 0

        # Set containing all submitted jobs
        self.__job_bank = set([])

        # Total running Job and Test failures encountered
        self.__failures = 0

        # Allow threads to set a global exception
        self.__error_state = False

        # Private set of jobs currently running
        self.__active_jobs = set([])

        # Jobs that are taking longer to finish than the alloted time are reported back early to inform
        # the user 'stuff' is still running. Jobs entering this set will not be reported again.
        self.jobs_reported = set([])

        # The last time the scheduler reported something
        self.last_reported_time = clock()

        # Sets of threading objects created by jobs entering and exiting the queues. When scheduler.waitFinish()
        # is called, and both thread pools are empty, the pools shut down, and the call to waitFinish() returns.
        self.__status_pool_lock = threading.Lock()
        self.__runner_pool_lock = threading.Lock()
        self.__status_pool_jobs = set([])
        self.__runner_pool_jobs = set([])

        # True when scheduler.waitFinish() is called. This alerts the scheduler, no more jobs are
        # to be scheduled. KeyboardInterrupts are then handled by the thread pools.
        self.__waiting = False

    def triggerErrorState(self):
        self.__error_state = True
        self.run_pool.close()
        self.status_pool.close()

    def killRemaining(self, keyboard=False):
        """ Method to kill running jobs """
        with self.activity_lock:
            for job in self.__active_jobs:
                job.killProcess()
        if keyboard:
            self.triggerErrorState()
            self.harness.keyboard_interrupt()
        else:
            self.triggerErrorState()

    def retrieveJobs(self):
        """ return all the jobs the scheduler was tasked to perform work for """
        return self.__job_bank

    def schedulerError(self):
        """ boolean if the scheduler prematurely exited """
        return self.__error_state and not self.maxFailures()

    def maxFailures(self):
        """ Boolean for hitting max failures """
        return ((self.options.valgrind_mode and self.__failures >= self.options.valgrind_max_fails)
                or self.__failures >= self.options.max_fails)

    def run(self, job):
        """ Call derived run method """
        return

    def notifyFinishedSchedulers(self):
        """ Notify derived schedulers we are finished """
        return

    def augmentJobs(self, Jobs):
        """
        Allow derived schedulers to augment Jobs before they perform work.
        Note: This occurs before we perform a job count sanity check. So
        any additions or subtractions to the number of jobs will result in
        an exception.
        """
        return

    def waitFinish(self):
        """
        Inform the Scheduler there are no further jobs to schedule.
        Return once all jobs have completed.
        """
        self.__waiting = True
        try:
            # wait until there is an error, or if all the queus are empty
            waiting_on_status_pool = True
            waiting_on_runner_pool = True

            while (waiting_on_status_pool or waiting_on_runner_pool) and self.job_count:

                if self.__error_state:
                    break

                with self.__status_pool_lock:
                    waiting_on_status_pool = sum(1 for x in self.__status_pool_jobs if not x.ready())
                with self.__runner_pool_lock:
                    waiting_on_runner_pool = sum(1 for x in self.__runner_pool_jobs if not x.ready())

                sleep(0.1)

            # Reporting sanity check
            if not self.__error_state and self.job_count:
                raise SchedulerError('Scheduler exiting with different amount of work than what was tasked!')

            if not self.__error_state:
                self.run_pool.close()
                self.run_pool.join()
                self.status_pool.close()
                self.status_pool.join()

            # allow derived schedulers to perform any exit routines
            self.notifyFinishedSchedulers()

        except KeyboardInterrupt:
            self.killRemaining(keyboard=True)

    def schedule(self, testers):
        """
        Generate and submit a group of testers to a thread pool queue for execution.
        """
        # If we are not to schedule any more jobs for some reason, return now
        if self.__error_state:
            return

        # Instance our job DAG, create jobs, and a private lock for this group of jobs (testers)
        Jobs = JobDAG(self.options)
        j_dag = Jobs.createJobs(testers)
        j_lock = threading.Lock()

        # Allow derived schedulers access to the jobs before they launch
        self.augmentJobs(Jobs)

        # job-count to tester-count sanity check
        if j_dag.size() != len(testers):
            raise SchedulerError('Scheduler was going to run a different amount of testers than what was received (something bad happened)!')

        # Final reporting job-count sanity check
        with self.job_count_lock:
            self.job_count += j_dag.size()

        # Store all processed jobs in the global job bank
        self.__job_bank.update(j_dag.topological_sort())

        # Launch these jobs to perform work
        self.queueJobs(Jobs, j_lock)

    def queueJobs(self, Jobs, j_lock):
        """
        Determine which queue jobs should enter. Finished jobs are placed in the status
        pool to be printed while all others are placed in the runner pool to perform work.

        A finished job will trigger a change to the Job DAG, which will allow additional
        jobs to become available and ready to enter the runner pool (dependency jobs).
        """
        with j_lock:
            concurrent_jobs = Jobs.getJobsAndAdvance()
            for job in concurrent_jobs:
                if job.isFinished():
                    if not self.status_pool._state:
                        with self.__status_pool_lock:
                            self.__status_pool_jobs.add(self.status_pool.apply_async(self.jobStatus, (job, Jobs, j_lock)))

                elif job.isHold():
                    if not self.run_pool._state:
                        with self.__runner_pool_lock:
                            job.setStatus(job.queued)
                            self.__runner_pool_jobs.add(self.run_pool.apply_async(self.runJob, (job, Jobs, j_lock)))

    def getLoad(self):
        """ Method to return current load average """
        loadAverage = 0.0
        try:
            loadAverage = os.getloadavg()[0]
        except AttributeError:
            pass      # getloadavg() not available in this implementation of os
        return loadAverage

    def satisfyLoad(self):
        """ Method for controlling load average """
        while self.slots_in_use > 1 and self.getLoad() >= self.average_load:
            sleep(1.0)

    def reserveSlots(self, job, j_lock):
        """
        Method which allocates resources to perform the job. Returns bool if job
        should be allowed to run based on available resources.
        """
        # comply with load average
        if self.options.load:
            self.satisfyLoad()

        with self.slot_lock:
            can_run = False
            if self.slots_in_use + job.getSlots() <= self.available_slots:
                can_run = True

            # Check for insufficient slots -soft limit
            elif job.getSlots() > self.available_slots and self.soft_limit:
                job.addCaveats('OVERSIZED')
                can_run = True

            # Check for insufficient slots -hard limit (skip this job)
            elif job.getSlots() > self.available_slots and not self.soft_limit:
                job.addCaveats('insufficient slots')
                with j_lock:
                    job.setStatus(job.skip)

            if can_run:
                self.slots_in_use += job.getSlots()
        return can_run

    def handleTimeoutJob(self, job, j_lock):
        """ Handle jobs that have timed out """
        with j_lock:
            if job.isRunning():
                job.setStatus(job.crash, 'TIMEOUT')
                job.killProcess()

    def handleLongRunningJob(self, job, Jobs, j_lock):
        """ Handle jobs that have not reported in the alotted time """
        with self.__status_pool_lock:
            self.__status_pool_jobs.add(self.status_pool.apply_async(self.jobStatus, (job, Jobs, j_lock)))

    def jobStatus(self, job, Jobs, j_lock):
        """
        Instruct the TestHarness to print the status of job. This is a serial
        threaded operation, so as to prevent clobbering of text being printed
        to stdout.
        """
        if self.status_pool._state:
            return

        # Its possible, the queue is just trying to empty
        try:
            job_was_running = False
            # Check if we should print due to inactivity
            with j_lock:
                if job.isRunning():
                    if job in self.jobs_reported:
                        return

                    # report inactivity if last reported time falls within tolerances
                    elif clock() - self.last_reported_time >= self.min_report_time:
                        job_was_running = True
                        job.addCaveats('FINISHED')

                        with self.activity_lock:
                            self.jobs_reported.add(job)

                    # TestHarness has not yet been inactive long enough to warrant a report
                    else:
                        # adjust the next report time based on delta of last report time
                        adjusted_interval = max(1, self.min_report_time - max(1, clock() - self.last_reported_time))
                        job.report_timer = threading.Timer(adjusted_interval,
                                                           self.handleLongRunningJob,
                                                           (job, Jobs, j_lock,))
                        job.report_timer.start()
                        return

            # Immediately following the Job lock, print the status
            self.harness.handleJobStatus(job)

            # Do last, to prevent premature thread pool closures
            with j_lock:
                tester = job.getTester()

                if not tester.isSilent():
                    self.last_reported_time = clock()

                if job.isFinished() and not job_was_running:
                    if tester.isFail():
                        self.__failures += 1

                    if self.maxFailures():
                        self.killRemaining()
                    else:
                        with self.job_count_lock:
                            self.job_count -= 1

        except Exception:
            print('statusWorker Exception: %s' % (traceback.format_exc()))
            self.killRemaining()

        except KeyboardInterrupt:
            self.killRemaining(keyboard=True)

    def runJob(self, job, Jobs, j_lock):
        """ Method the run_pool calls when an available thread becomes ready """
        # Its possible, the queue is just trying to empty. Allow it to do so
        # with out generating overhead
        if self.__error_state:
            return

        try:
            # see if we have enough slots to start this job
            if self.reserveSlots(job, j_lock):
                with j_lock:
                    job.setStatus(job.running)

                with self.activity_lock:
                    self.__active_jobs.add(job)

                tester = job.getTester()
                timeout_timer = threading.Timer(float(tester.getMaxTime()),
                                                self.handleTimeoutJob,
                                                (job, j_lock,))

                job.report_timer = threading.Timer(self.min_report_time,
                                                   self.handleLongRunningJob,
                                                   (job, Jobs, j_lock,))

                job.report_timer.start()
                timeout_timer.start()
                self.run(job) # Hand execution over to derived scheduler
                timeout_timer.cancel()

                # Recover worker count before attempting to queue more jobs
                with self.slot_lock:
                    self.slots_in_use = max(0, self.slots_in_use - job.getSlots())

                # Stop the long running timer
                job.report_timer.cancel()

                # All done
                with j_lock:
                    job.setStatus(job.finished)

                with self.activity_lock:
                    self.__active_jobs.remove(job)

            # Not enough slots to run the job...
            else:
                # ...currently, place back on hold before placing it back into the queue
                if not job.isFinished():
                    with j_lock:
                        job.setStatus(job.hold)
                    sleep(.1)

            # Job is done (or needs to re-enter the queue)
            self.queueJobs(Jobs, j_lock)

        except Exception:
            print('runWorker Exception: %s' % (traceback.format_exc()))
            self.killRemaining()

        except KeyboardInterrupt:
            self.killRemaining(keyboard=True)
    def execute(self) -> typing.Optional[int]:
        """Execute the transfer."""
        # Validate arguments
        res = self.check_args(self.args)
        if res:  # pragma: nocover
            return res

        # Logger
        logger.info("Starting cubi-tk snappy %s", self.command_name)
        logger.info("  args: %s", self.args)

        # Fix for ngs_mapping & variant_calling vs step
        if self.step_name is None:
            self.step_name = self.args.step

        # Find biomedsheet file
        biomedsheet_tsv = get_biomedsheet_path(start_path=self.args.base_path,
                                               uuid=self.args.destination)

        # Extract library names from sample sheet
        sheet = load_sheet_tsv(biomedsheet_tsv, self.args.tsv_shortcut)
        library_names = list(
            self.yield_ngs_library_names(sheet=sheet,
                                         min_batch=self.args.first_batch,
                                         max_batch=self.args.last_batch))
        logger.info("Libraries in sheet:\n%s",
                    "\n".join(sorted(library_names)))

        lz_uuid, transfer_jobs = self.build_jobs(library_names)
        logger.debug("Transfer jobs:\n%s",
                     "\n".join(map(lambda x: x.to_oneline(), transfer_jobs)))

        if self.fix_md5_files:
            transfer_jobs = self._execute_md5_files_fix(transfer_jobs)

        total_bytes = sum([job.bytes for job in transfer_jobs])
        logger.info(
            "Transferring %d files with a total size of %s",
            len(transfer_jobs),
            sizeof_fmt(total_bytes),
        )
        counter = Value(c_ulonglong, 0)
        with tqdm.tqdm(total=total_bytes, unit="B", unit_scale=True) as t:
            if self.args.num_parallel_transfers == 0:  # pragma: nocover
                for job in transfer_jobs:
                    irsync_transfer(job, counter, t)
            else:
                pool = ThreadPool(processes=self.args.num_parallel_transfers)
                for job in transfer_jobs:
                    pool.apply_async(irsync_transfer, args=(job, counter, t))
                pool.close()
                pool.join()

        # Validate and move transferred files
        # Behaviour: If flag is True and lz uuid is not None*,
        # it will ask SODAR to validate and move transferred files.
        # (*) It can be None if user provided path
        if lz_uuid and self.args.validate_and_move:
            self.move_landing_zone(lz_uuid=lz_uuid)
        else:
            logger.info(
                "Transferred files will \033[1mnot\033[0m be automatically moved in SODAR."
            )

        logger.info("All done")
        return None
Exemple #52
0
def pred_eval_multiprocess(gpu_num,
                           key_predictors,
                           cur_predictors,
                           test_datas,
                           imdb,
                           cfg,
                           vis=False,
                           thresh=1e-3,
                           logger=None,
                           ignore_cache=True):

    if cfg.TEST.SEQ_NMS == False:
        if gpu_num == 1:
            res = [
                pred_eval(0, key_predictors[0], cur_predictors[0],
                          test_datas[0], imdb, cfg, vis, thresh, logger,
                          ignore_cache),
            ]
        else:
            from multiprocessing.pool import ThreadPool as Pool
            pool = Pool(processes=gpu_num)
            multiple_results = [
                pool.apply_async(pred_eval,
                                 args=(i, key_predictors[i], cur_predictors[i],
                                       test_datas[i], imdb, cfg, vis, thresh,
                                       logger, ignore_cache))
                for i in range(gpu_num)
            ]
            pool.close()
            pool.join()
            res = [res.get() for res in multiple_results]
        info_str = imdb.evaluate_detections_multiprocess(res)

    else:
        if gpu_num == 1:
            res = [
                pred_eval(0, key_predictors[0], cur_predictors[0],
                          test_datas[0], imdb, cfg, vis, thresh, logger,
                          ignore_cache),
            ]

        else:
            from multiprocessing.pool import ThreadPool as Pool

            pool = Pool(processes=gpu_num)
            multiple_results = [
                pool.apply_async(pred_eval,
                                 args=(i, key_predictors[i], cur_predictors[i],
                                       test_datas[i], imdb, cfg, vis, thresh,
                                       logger, ignore_cache))
                for i in range(gpu_num)
            ]
            pool.close()
            pool.join()
            res = [res.get() for res in multiple_results]

        from multiprocessing import Pool as Pool
        pool = Pool(processes=gpu_num)
        jobs = []
        res = []
        for i in range(gpu_num):
            job = apply_async(pool, pred_eval_seqnms, (i, imdb))
            jobs.append(job)
        for job in jobs:
            res.append(job.get())
        info_str = imdb.do_python_eval_gen(gpu_num)
    if logger:
        logger.info('evaluate detections: \n{}'.format(info_str))
def fold_and_score_pipeline(data):
    '''
    required from pipeline: Filetype, filename, beam id , pointing id, directory

    '''
    tstart = time.time()
    output_dps = []
    dp_list = []

    processing_args = data['processing_args']
    output_dir = data['base_output_dir']
    processing_id = data['processing_id']

    #Make output dir
    try:
        subprocess.check_call("mkdir -p %s" % (output_dir), shell=True)
    except:
        log.info("Already made subdirectory")
        pass

    # Make temporary folder to keep any temporary outputs
    tmp_dir = '/beeond/PROCESSING/TEMP/%d' % processing_id
    try:
        subprocess.check_call("mkdir -p %s" % (tmp_dir), shell=True)
    except:
        log.info("Already made subdirectory")
        pass

    # Get the beam info
    for pointing in data["data"]["pointings"]:
        utc_start = pointing['utc_start']
        for beam in pointing["beams"]:
            input_fil_list = []
            for dp in (beam["data_products"]):
                if '.fil' in dp["filename"]:
                    input_fil_list.append(dp["filename"])
                elif '.tar.gz' in dp['filename']:
                    tarred_csv = dp["filename"]
                beam_ID = int(beam["id"])
                beam_name = beam["name"]

            input_fil_list.sort()
            input_filenames = ' '.join(input_fil_list)

            # Untar csv file
            untar_file(tarred_csv, tmp_dir)
            tmp_dir = tmp_dir + '/' + os.path.basename(tarred_csv)

            #Read candidate info file into Pandas Dataframe
            cand_file = glob.glob('%s/*good_cands_to_fold_with_beam.csv' %
                                  (tmp_dir))[0]
            df = pd.read_csv(cand_file)

            # Select only candidates with corresponding beam id and snr cutoff
            snr_cut_cands = df[
                df['snr'] > float(processing_args['snr_cutoff'])]
            single_beam_cands = snr_cut_cands[snr_cut_cands['beam_id'] ==
                                              beam_ID]
            single_beam_cands.sort_values('snr', inplace=True, ascending=False)

            #Limit number of candidates to fold
            if single_beam_cands.shape[0] > processing_args[
                    'cand_limit_per_beam']:
                single_beam_cands_fold_limited = single_beam_cands.head(
                    processing_args['cand_limit_per_beam'])
            else:
                single_beam_cands_fold_limited = single_beam_cands

            # Read parameters and fold
            cand_periods = single_beam_cands_fold_limited['period'].to_numpy()
            cand_accs = single_beam_cands_fold_limited['acc'].to_numpy()
            cand_dms = single_beam_cands_fold_limited['dm'].to_numpy()
            cand_ids = single_beam_cands_fold_limited[
                'cand_id_in_file'].to_numpy()
            xml_files = single_beam_cands_fold_limited['file'].to_numpy(
            )  # Choose first element. If filtered right, there should be just one xml filename throughout!

            tree = ET.parse(xml_files[0])
            root = tree.getroot()

            tsamp = float(root.find("header_parameters/tsamp").text)
            fft_size = float(root.find('search_parameters/size').text)
            no_of_samples = int(root.find("header_parameters/nsamples").text)

            mod_periods = []
            pdots = []
            for i in range(len(cand_periods)):
                Pdot = a_to_pdot(cand_periods[i], cand_accs[i])
                mod_periods.append(
                    period_modified(cand_periods[i], Pdot, no_of_samples,
                                    tsamp, fft_size))
                pdots.append(Pdot)

            cand_mod_periods = np.asarray(mod_periods, dtype=float)
            mask_path = '/beegfs/PROCESSING/TRAPUM/RFIFIND_masks/Fermi_409chans_mask/Fermi_beam0_052838_20200704_rfifind.mask'

            #Parallel process the folds
            no_of_cands = len(cand_mod_periods)

            command_list = []
            for i in range(no_of_cands):
                folding_packet = {}
                folding_packet['period'] = cand_mod_periods[i]
                folding_packet['acc'] = cand_accs[i]
                folding_packet['pdot'] = pdots[i]
                folding_packet['dm'] = cand_dms[i]
                output_name = "%s_%s_candidate_no_%03d_dm_%.2f_acc_%.2f" % (
                    beam_name, utc_start, cand_ids[i], folding_packet['dm'],
                    folding_packet['acc'])
                script = "prepfold -ncpus 1 -nsub 256 -mask %s -noxwin -topo -p %s -pd %s -dm %s %s -o %s" % (
                    mask_path, str(folding_packet['period']),
                    str(folding_packet['pdot']), str(
                        folding_packet['dm']), input_filenames, output_name)
                command_list.append(script)

            pool = ThreadPool(multiprocessing.cpu_count())
            for command in command_list:
                pool.apply_async(execute_command, args=(command, tmp_dir))

            pool.close()
            pool.join()

            log.info(
                "Folding done for all candidates. Scoring all candidates...")
            subprocess.check_call("python2 webpage_score.py --in_path=%s" %
                                  tmp_dir,
                                  shell=True)
            log.info("Scoring done...")

            sys.exit(0)

            #Create tar file of tmp directory in output directory
            subprocess.check_call("rm *.csv", shell=True,
                                  cwd=tmp_dir)  # Remove the csv files
            log.info("Tarring up all folds and the score file")
            tar_name = os.path.basename(output_dir) + "folds_and_scores.tar.gz"
            make_tarfile(output_dir, tmp_dir, tar_name)
            log.info("Tarred")

            # Remove contents in temporary directory
            remove_dir(tmp_dir)
            log.info("Removed temporary files")

            # Add tar file to dataproduct
            dp = dict(type="fold_tar_file",
                      filename=tar_name,
                      directory=output_dir,
                      beam_id=beam_ID,
                      pointing_id=pointing["id"],
                      metainfo=json.dumps("tar_file:folded_archives"))

            output_dps.append(dp)

    tend = time.time()
    print("Time taken is : %f s" % (tend - tstart))
    return output_dps
Exemple #54
0
class Scheduler(MooseObject):
    """
    Base class for handling jobs asynchronously. To use this class, call .schedule()
    and supply a list of testers to schedule. Each group of testers supplied will begin
    running immediately.

    Syntax:
       .schedule([list of tester objects])

    A list of testers will be added to a queue and begin calling their derived run method.
    You can continue to add more testers to the queue in this fashion.

    Once you schedule all the testers you wish to test, call .waitFinish() to wait until
    all testers have finished.

    """

    @staticmethod
    def validParams():
        params = MooseObject.validParams()
        params.addRequiredParam('average_load',  64.0, "Average load to allow")
        params.addRequiredParam('max_processes', None, "Hard limit of maxium processes to use")

        return params

    def __init__(self, harness, params):
        MooseObject.__init__(self, harness, params)

        ## The test harness to run callbacks on
        self.harness = harness

        # Retrieve and store the TestHarness options for use in this object
        self.options = harness.getOptions()

        # The Scheduler class can be initialized with no "max_processes" argument and it'll default
        # to a soft limit. If however a max_processes is passed we'll treat it as a hard limit.
        # The difference is whether or not we allow single jobs to exceed the number of slots.
        if params['max_processes'] == None:
            self.available_slots = 1
            self.soft_limit = True
        else:
            self.available_slots = params['max_processes'] # hard limit
            self.soft_limit = False

        # Requested average load level to stay below
        self.average_load = params['average_load']

        # The time the status queue reported no activity to the TestHarness
        self.last_reported = clock()

        # A set containing jobs that have been reported
        self.jobs_reported = set([])

        # Initialize run_pool based on available slots
        self.run_pool = ThreadPool(processes=self.available_slots)

        # Initialize status_pool to only use 1 process (to prevent status messages from getting clobbered)
        self.status_pool = ThreadPool(processes=1)

        # Slot Lock when processing resource allocations
        self.slot_lock = threading.Lock()

        # DAG Lock when processing the DAG
        self.dag_lock = threading.Lock()

        # Workers in use (single job might request multiple slots)
        self.slots_in_use = 0

        # Jobs waiting to finish (includes actively running jobs)
        self.job_queue_count = 0

        # Set containing our TesterData containers. We use this in the event of a KeyboardInterrupt to
        # iterate over and kill any subprocesses
        self.tester_datas = set([])

    def killRemaining(self):
        """
        Method to kill any running subprocess started by the Scheduler. This also
        closes the status pool to prevent further statuses from printing to the
        screen.
        """
        self.run_pool.close()
        self.status_pool.close()

        for tester_data in self.tester_datas:
            tester_data.killProcess()
        self.job_queue_count = 0

    def run(self, job_container):
        """ Call derived run method """
        return

    def skipPrereqs(self):
        """
        Method to return boolean to skip dependency prerequisites checks.
        """
        if self.options.ignored_caveats:
            if 'all' in self.options.ignored_caveats or 'prereq' in self.options.ignored_caveats:
                return True
        return False

    def processDownstreamTests(self, job_container):
        """
        Method to discover and delete downstream jobs due to supplied job failing.
        """
        with self.dag_lock:
            failed_job_containers = set([])
            tester = job_container.getTester()
            job_dag = job_container.getDAG()
            if (tester.isFinished() and not tester.didPass() and not tester.isSilent() and not self.skipPrereqs()) \
                or (self.options.dry_run and not tester.isSilent()):

                # Ask the DAG to delete and return the downstream jobs associated with this job
                failed_job_containers.update(job_dag.delete_downstreams(job_container))

            for failed_job in failed_job_containers:
                tester = failed_job.getTester()
                tester.setStatus('skipped dependency', tester.bucket_skip)

        return failed_job_containers

    def buildDAG(self, job_container_dict, job_dag):
        """
        Build the DAG and catch any failures.
        """

        failed_or_skipped_testers = set([])

        # Create DAG independent nodes
        for tester_name, job_container in job_container_dict.iteritems():
            tester = job_container.getTester()

            # If this tester is not runnable, continue to the next tester
            if tester.getRunnable(self.options):

                job_dag.add_node_if_not_exists(job_container)

            else:
                failed_or_skipped_testers.add(tester)
                continue

        # Create edge nodes
        for tester_name, job_container in job_container_dict.iteritems():
            tester = job_container.getTester()

            # Add the prereq node and edges
            for prereq in tester.getPrereqs():

                try:
                    # Try to produce a KeyError and capture an unknown dependency
                    job_container_dict[prereq]

                    # Try to produce either a cyclic or skipped dependency error using the DAG's
                    # built-in exception methods
                    job_dag.add_edge(job_container_dict[prereq], job_container)

                # Skipped Dependencies
                except dag.DAGEdgeIndError:
                    if not self.skipPrereqs():
                        tester.setStatus('skipped dependency', tester.bucket_skip)
                        failed_or_skipped_testers.add(tester)

                    # Add the parent node / dependency edge to create a functional DAG now that we have caught
                    # the skipped dependency (needed for discovering race conditions later on)
                    job_dag.add_node_if_not_exists(job_container_dict[prereq])
                    job_dag.add_edge(job_container_dict[prereq], job_container)

                # Cyclic Failure
                except dag.DAGValidationError:
                    tester.setStatus('Cyclic or Invalid Dependency Detected!', tester.bucket_fail)
                    failed_or_skipped_testers.add(tester)

                # Unknown Dependency Failure
                except KeyError:
                    tester.setStatus('unknown dependency', tester.bucket_fail)
                    failed_or_skipped_testers.add(tester)

                # Skipped/Silent/Deleted Testers fall into this catagory, caused by 'job_container' being skipped
                # during the first iteration above
                except dag.DAGEdgeDepError:
                    pass

        # With a working DAG created above (even a partial one), discover race conditions with remaining runnable
        # testers.
        failed_or_skipped_testers.update(self.checkRaceConditions(job_dag))

        return failed_or_skipped_testers

    def checkRaceConditions(self, dag_object):
        """
        Return a set of failing testers exhibiting race conditions with their
        output file.
        """
        failed_or_skipped_testers = set([])

        # clone the dag so we can operate destructively on the cloned dag
        dag_clone = dag_object.clone()

        while dag_clone.size():
            output_files_in_dir = set()

            # Get a list of concurrent job containers
            concurrent_jobs = dag_clone.ind_nodes()

            for job_container in concurrent_jobs:
                tester = job_container.getTester()
                output_files = tester.getOutputFiles()

                # check if we have colliding output files
                if len(output_files_in_dir.intersection(set(output_files))):

                    # Fail this concurrent group of testers
                    for this_job in concurrent_jobs:
                        tester = this_job.getTester()
                        tester.setStatus('OUTFILE RACE CONDITION', tester.bucket_fail)
                        failed_or_skipped_testers.add(tester)

                    # collisions detected, move on to the next set
                    break

                output_files_in_dir.update(output_files)

            # Delete this group of job containers and allow the loop to continue
            for job_container in concurrent_jobs:
                dag_clone.delete_node(job_container)

        return failed_or_skipped_testers

    def schedule(self, testers):
        """
        Schedule supplied list of testers for execution.
        """
        # If any threads caused an exception, we have already closed down the queue and need to
        # not schedule any more jobs
        if self.run_pool._state:
            return

        # Instance the DAG class so we can share it amongst all the TesterData containers
        job_dag = dag.DAG()

        non_runnable_jobs = set([])
        name_to_job_container = {}

        # Increment our simple queue count with the number of testers the scheduler received
        with self.slot_lock:
            self.job_queue_count += len(testers)

        # Create a local dictionary of tester names to job containers. Add this dictionary to a
        # set. We will use this set as a way to gain access to their methods.
        for tester in testers:
            name_to_job_container[tester.getTestName()] = TesterData(tester, job_dag, self.options)
            self.tester_datas.add(name_to_job_container[tester.getTestName()])

        # Populate job_dag with testers. This method will also return any testers which caused failures
        # while building the DAG.
        skipped_or_failed_testers = self.buildDAG(name_to_job_container, job_dag)

        # Create a set of failing job containers
        for failed_tester in skipped_or_failed_testers:
            non_runnable_jobs.add(name_to_job_container[failed_tester.getTestName()])

        # Iterate over the jobs in our non_runnable_jobs and handle any downstream jobs affected by
        # 'job'. These will be our 'skipped dependency' tests.
        for job in non_runnable_jobs.copy():
            additionally_skipped = self.processDownstreamTests(job)
            non_runnable_jobs.update(additionally_skipped)
            job_dag.delete_node_if_exists(job)

        # Get a count of all the items still in the DAG. These will be the jobs that ultimately are queued
        runnable_jobs = job_dag.size()

        # Make sure we didn't drop a tester somehow
        if len(non_runnable_jobs) + runnable_jobs != len(testers):
            raise SchedulerError('Runnable tests in addition to Skipped tests does not match total scheduled test count!')

        # Assign a status thread to begin work on any skipped/failed jobs
        self.queueJobs(status_jobs=non_runnable_jobs)

        # Build our list of runnable jobs and set the tester's status to queued
        job_list = []
        if runnable_jobs:
            job_list = job_dag.ind_nodes()
            for job_container in job_list:
                tester = job_container.getTester()
                tester.setStatus('QUEUED', tester.bucket_pending)

        # Queue runnable jobs
        self.queueJobs(run_jobs=job_list)

    def waitFinish(self):
        """
        Block while the job queue is not empty. Once empty, this method will begin closing down
        the thread pools and perform a join. Once the last thread exits, we return from this
        method.

        There are two thread pools in play; the Tester pool which is performing all the tests,
        and the Status pool which is handling the printing of tester statuses. Because the
        Status pool will always have the last item needing to be 'printed', we close and join
        the Tester pool first, and then we do the same to the Status pool.
        """
        while self.job_queue_count > 0:
            sleep(0.5)

        self.run_pool.close()
        self.run_pool.join()
        self.status_pool.close()
        self.status_pool.join()

    def handleLongRunningJobs(self, job_container):
        """ Handle jobs that have not reported in alotted time """
        if job_container not in self.jobs_reported:
            tester = job_container.getTester()
            tester.setStatus('RUNNING...', tester.bucket_pending)
            self.queueJobs(status_jobs=[job_container])

            # Restart the reporting timer for this job
            job_container.report_timer = threading.Timer(float(tester.getMinReportTime()),
                                                         self.handleLongRunningJobs,
                                                         (job_container,))

            job_container.report_timer.start()

    def handleTimeoutJobs(self, job_container):
        """ Handle jobs that have timed out """
        tester = job_container.getTester()
        tester.setStatus('TIMEOUT', tester.bucket_fail)
        job_container.killProcess()

    def getLoad(self):
        """ Method to return current load average """
        loadAverage = 0.0
        try:
            loadAverage = os.getloadavg()[0]
        except AttributeError:
            pass      # getloadavg() not available in this implementation of os
        return loadAverage

    def satisfyLoad(self):
        """ Method for controlling load average """
        while self.slots_in_use > 1 and self.getLoad() >= self.average_load:
            sleep(1.0)

    def reserveSlots(self, job_container):
        """
        Method which allocates resources to perform the job. Returns bool if job
        should be allowed to run.
        """
        tester = job_container.getTester()

        # comply with load average
        if self.options.load:
            self.satisfyLoad()

        with self.slot_lock:
            can_run = False
            if self.slots_in_use + tester.getProcs(self.options) <= self.available_slots:
                can_run = True

            # Check for insufficient slots -soft limit
            # TODO: Create a unit test for this case
            elif tester.getProcs(self.options) > self.available_slots and self.soft_limit:
                tester.specs.addParam('caveats', ['OVERSIZED'], "")
                can_run = True

            # Check for insufficient slots -hard limit (skip this job)
            # TODO: Create a unit test for this case
            elif tester.getProcs(self.options) > self.available_slots and not self.soft_limit:
                tester.setStatus('insufficient slots', tester.bucket_skip)
                can_run = False

            if can_run:
                self.slots_in_use += tester.getProcs(self.options)

        return can_run

    def getNextJobGroup(self, job_container):
        """
        Method to delete current finished job from the DAG and return the next
        list of individually runnable jobs.
        """
        with self.dag_lock:
            job_dag = job_container.getDAG()
            next_job_list = []

            # Delete this job from the shared DAG
            job_dag.delete_node(job_container)

            # Get next available job list
            concurrent_jobs = job_dag.ind_nodes()

            for next_job_container in concurrent_jobs:
                queued_tester = next_job_container.getTester()

                # Verify this job is not already running/pending/skipped
                if queued_tester.isInitialized():
                    # Set this next new job to pending so as to prevent this job from being launched a second time
                    queued_tester.setStatus('QUEUED', queued_tester.bucket_pending)
                    next_job_list.append(next_job_container)

        return next_job_list

    def queueJobs(self, status_jobs=[], run_jobs=[]):
        """
        Method to control which thread pool jobs enter.
        Syntax:

           To have a job(s) display its current status to the screen:
           .queueJobs(status_jobs=[job_container_list]

           To begin running job(s):
           .queueJobs(run_jobs=[job_container_list]

        """
        for job_container in run_jobs:
            if not self.run_pool._state:
                self.run_pool.apply_async(self.runWorker, (job_container,))

        for job_container in status_jobs:
            if not self.status_pool._state:
                self.status_pool.apply_async(self.statusWorker, (job_container,))

    def statusWorker(self, job_container):
        """ Method the status_pool calls when an available thread becomes ready """
        # Wrap entire statusWorker thread inside a try/exception to catch thread errors
        try:
            tester = job_container.getTester()

            # If the job is still running for a long period of time and we have not reported
            # this same job alread, report it now.
            if tester.isPending():
                if clock() - self.last_reported >= float(tester.getMinReportTime()) and job_container not in self.jobs_reported:
                    # Inform the TestHarness of a long running test (RUNNING...)
                    self.harness.handleTestStatus(job_container)

                    # ...And then set the finished caveat now that the running status has printed
                    tester.specs.addParam('caveats', ['FINISHED'], "")

                    # Add this job to the reported container so it does not happen again
                    self.jobs_reported.add(job_container)

                # Job is 'Pending', but is under the threshold to be reported (return now so
                # last_reported time does not get updated). This will ensure that if nothing
                # has happened between 'now' and another occurrence of our thread timer event
                # we do report it.
                else:
                    return

            else:
                # All other statuses are sent unmolested
                self.harness.handleTestStatus(job_container)

            # Decrement the job queue count now that this job has finished
            if tester.isFinished():
                with self.slot_lock:
                    self.job_queue_count -= 1

            # Record current reported time only if it is an activity the user will see
            if not tester.isSilent() or not tester.isDeleted():
                self.last_reported = clock()

        except Exception as e:
            print 'statusWorker Exception: %s' % (e)
            self.killRemaining()

    def runWorker(self, job_container):
        """ Method the run_pool calls when an available thread becomes ready """
        # Wrap the entire runWorker thread inside a try/exception to catch thread errors
        try:
            tester = job_container.getTester()
            # Check if there are enough resources to run this job
            if self.reserveSlots(job_container):

                # Start long running timer
                job_container.report_timer = threading.Timer(float(tester.getMinReportTime()),
                                                             self.handleLongRunningJobs,
                                                             (job_container,))
                job_container.report_timer.start()

                # Start timeout timer
                timeout_timer = threading.Timer(float(tester.getMaxTime()),
                                          self.handleTimeoutJobs,
                                          (job_container,))
                timeout_timer.start()

                # Call the derived run method
                self.run(job_container)

                # Stop timers now that the job has finished on its own
                job_container.report_timer.cancel()
                timeout_timer.cancel()

                # Derived run needs to set a non-pending status of some sort.
                if tester.isPending():
                    raise SchedulerError('Derived Scheduler can not return a pending status!')

                # Determin if this job creates any skipped dependencies (if it failed), and send
                # this new list of jobs to the status queue to be printed.
                possibly_skipped_job_containers = self.processDownstreamTests(job_container)
                possibly_skipped_job_containers.add(job_container)
                self.queueJobs(status_jobs=possibly_skipped_job_containers)

                # Get next job list
                next_job_group = self.getNextJobGroup(job_container)

                # Recover worker count before attempting to queue more jobs
                with self.slot_lock:
                    self.slots_in_use = max(0, self.slots_in_use - tester.getProcs(self.options))

                # Queue this new batch of runnable jobs
                self.queueJobs(run_jobs=next_job_group)

            # Not enough slots to run the job, currently
            else:
                # There will never be enough slots to run this job (insufficient slots)
                if tester.isFinished():
                    failed_downstream = self.processDownstreamTests(job_container)
                    failed_downstream.add(job_container)
                    self.queueJobs(status_jobs=failed_downstream)

                # There are no available slots, currently. Place back in queue, and sleep for a bit
                else:
                    self.queueJobs(run_jobs=[job_container])
                    sleep(0.3)

        except Exception as e:
            print 'runWorker Exception: %s' % (e)
            self.killRemaining()
    def execute_sqls_threaded(self, sql_queries, thread_pool_size=5):
        """ executes a array of SQLs using threads and returns results, useful for threaded batch operations
        Parameters:
        sql_queries array of SQL queries to execute
        thread_pool_size pool size to use, MAX a/c limit in PROD is 50 so its recommended to keep it around 2-5.

        Returns:
        True if all SQLs have been executed successfully, else False
        """
        if len(sql_queries) == 0:
            return True

        start_time = time.time()

        if (thread_pool_size < 1):
            thread_pool_size = 1
        POOL_SIZE = thread_pool_size

        if (len(sql_queries) < POOL_SIZE):
            POOL_SIZE = len(sql_queries)
        # Make the Pool of workers
        pool = ThreadPool(POOL_SIZE)

        print("Using pool size of {}".format(POOL_SIZE))

        count = 0
        failed_count = 0
        OPERATIONS = len(sql_queries)
        result = True
        while ((count + failed_count) < OPERATIONS):
            # print(count,failed_count,OPERATIONS)
            try:
                for i, r in enumerate(
                        pool.imap_unordered(self.start_query_execution_and_wait_for_completion, sql_queries), 1):
                    try:
                        # print(i,r)

                        if r is None:
                            failed_count = failed_count + 1
                            result = False
                        elif "SUCCESS" in r and r["SUCCESS"] == False:
                            failed_count = failed_count + 1
                            result = False
                            # break
                        else:
                            print(r["QUERY"])
                            count += 1
                            # your code
                        # elapsed_time = time.time() - start_time
                        # sys.stderr.write('\r{0:%} {} {}'.format((count*1.0/OPERATIONS),count,elapsed_time))
                        sys.stderr.write(
                            '\r{0:%} completed {1}, failed {2}, TOTAL: {3}'.format((count * 1.0 / OPERATIONS), count,
                                                                                   failed_count, OPERATIONS))
                    except Exception as e:
                        # print(traceback.format_exc())
                        print("#", str(e))
                        failed_count += 1
                        # print('#',sys.exc_info()[1])
                        # pass
            except Exception as e:
                # print(traceback.format_exc())
                print(str(e))
                failed_count += 1
                # print('$',sys.exc_info()[1])
                pass
        print("test_threaded_metric_log --- %s seconds ---for %s get ops using %s threads" % (
        (time.time() - start_time), OPERATIONS, POOL_SIZE))
        print("total: " + str(OPERATIONS) + ", failed: " + str(failed_count))

        # close the pool and wait for the work to finish
        pool.close()
        pool.join()

        if ((result == True and count == OPERATIONS)):
            print("Operation successful")
            return True
        else:
            print("Operation had errors")
            raise Exception("Operation had errors")
Exemple #56
0
class OrderedEnqueuer(SequenceEnqueuer):
    """Builds a Enqueuer from a Sequence.

  Used in `fit_generator`, `evaluate_generator`, `predict_generator`.

  Arguments:
      sequence: A `keras.utils.data_utils.Sequence` object.
      use_multiprocessing: use multiprocessing if True, otherwise threading
      scheduling: Sequential querying of datas if 'sequential', random
        otherwise.
      shuffle: Whether to shuffle the data at the beginning of each epoch.
  """
    def __init__(self, sequence, use_multiprocessing=False, shuffle=False):
        self.sequence = sequence
        self.use_multiprocessing = use_multiprocessing
        self.shuffle = shuffle
        self.workers = 0
        self.executor = None
        self.queue = None
        self.run_thread = None
        self.stop_signal = None

    def is_running(self):
        return self.stop_signal is not None and not self.stop_signal.is_set()

    def start(self, workers=1, max_queue_size=10):
        """Start the handler's workers.

    Arguments:
        workers: number of worker threads
        max_queue_size: queue size
            (when full, workers could block on `put()`)
    """
        if self.use_multiprocessing:
            self.executor = multiprocessing.Pool(workers)
        else:
            self.executor = ThreadPool(workers)
        self.queue = queue.Queue(max_queue_size)
        self.stop_signal = threading.Event()
        self.run_thread = threading.Thread(target=self._run)
        self.run_thread.daemon = True
        self.run_thread.start()

    def _run(self):
        """Submits requests to the executor and queues the `Future` objects."""
        sequence = list(range(len(self.sequence)))
        while True:
            if self.shuffle:
                random.shuffle(sequence)
            for i in sequence:
                if self.stop_signal.is_set():
                    return
                self.queue.put(self.executor.apply_async(
                    get_index, (self.sequence, i)),
                               block=True)
            self.sequence.on_epoch_end()

    def get(self):
        """Creates a generator to extract data from the queue.

    Skip the data if it is `None`.

    Yields:
        Tuples (inputs, targets)
            or (inputs, targets, sample_weights)
    """
        try:
            while self.is_running():
                inputs = self.queue.get(block=True).get()
                if inputs is not None:
                    yield inputs
        except Exception as e:
            self.stop()
            raise StopIteration(e)

    def stop(self, timeout=None):
        """Stops running threads and wait for them to exit, if necessary.

    Should be called by the same thread which called `start()`.

    Arguments:
        timeout: maximum time to wait on `thread.join()`
    """
        self.stop_signal.set()
        with self.queue.mutex:
            self.queue.queue.clear()
            self.queue.unfinished_tasks = 0
            self.queue.not_full.notify()
        self.executor.close()
        self.executor.join()
        self.run_thread.join(timeout)
Exemple #57
0
    def convert_dataset(self,
                        dataset,
                        to_format,
                        local_path,
                        conversion_func=None,
                        filters=None,
                        annotation_filter=None):
        """
        Convert entire dataset

        :param annotation_filter:
        :param dataset:
        :param to_format:
        :param local_path:
        :param conversion_func: Custom conversion service
        :param filters: optional
        :return:
        """
        if to_format.lower() == 'coco':
            return self.__convert_dataset_to_coco(
                dataset=dataset,
                local_path=local_path,
                filters=filters,
                annotation_filter=annotation_filter)
        num_workers = 6
        assert isinstance(dataset, entities.Dataset)
        self.dataset = dataset

        # download annotations
        if annotation_filter is None:
            dataset.download_annotations(local_path=local_path, overwrite=True)
        local_annotations_path = os.path.join(local_path, "json")
        output_annotations_path = os.path.join(local_path, to_format)
        pool = ThreadPool(processes=num_workers)
        i_item = 0
        pages = dataset.items.list(filters=filters)

        # if yolo - create labels file
        if to_format == 'yolo':
            labels = [label.tag for label in dataset.labels]
            with open('{}/{}.names'.format(local_path, dataset.name),
                      'w') as fp:
                for label in labels:
                    fp.write("{}\n".format(label))

        pbar = tqdm.tqdm(total=pages.items_count)
        for page in pages:
            for item in page:
                i_item += 1
                # create input annotations json
                in_filepath = os.path.join(local_annotations_path,
                                           item.filename[1:])
                name, ext = os.path.splitext(in_filepath)
                in_filepath = name + '.json'

                save_to = os.path.dirname(
                    in_filepath.replace(local_annotations_path,
                                        output_annotations_path))

                if not os.path.isdir(save_to):
                    os.makedirs(save_to, exist_ok=True)

                converter = utilities.Converter()
                converter.dataset = self.dataset
                converter.save_to_format = self.save_to_format
                converter.xml_template_path = self.xml_template_path

                if annotation_filter is None:
                    method = converter.convert_file
                else:
                    method = converter.__save_filtered_annotations_and_convert

                pool.apply_async(func=method,
                                 kwds={
                                     "to_format": to_format,
                                     "from_format": 'dataloop',
                                     "file_path": in_filepath,
                                     "save_locally": True,
                                     "save_to": save_to,
                                     'conversion_func': conversion_func,
                                     'item': item,
                                     'pbar': pbar,
                                     'filters': annotation_filter
                                 })
        pool.close()
        pool.join()
        pool.terminate()
        pbar.close()
Exemple #58
0
def cli():
    global options
    global output_list
    output_list = []
    ips = []

    (options, args) = get_parsed_args()

    # Generate ips from file
    if options.input_file is not None:
        try:
            with open(options.input_file, 'r') as f:
                lines = f.read().splitlines()
        except Exception as ex:
            print(ex)
            sys.exit(1)
        for item in lines:
            if '/' not in item:
                print(
                    '\nerror: %s does not appear to be in CIDR format' % item
                )
                sys.exit(1)
            try:
                network = ipaddr.IPv4Network(item)
            except (ipaddr.AddressValueError, ipaddr.NetmaskValueError):
                print('\nerror: %s is not a valid network' % item)
                sys.exit(1)
            network_ips = [str(ip) for ip in network.iterhosts()]
            ips += network_ips
    # Get network from command line
    else:
        if '/' not in args[0]:
            print('\nerror: %s does not appear to be in CIDR format' % args[0])
            sys.exit(1)
        try:
            network = ipaddr.IPv4Network(args[0])
        except (ipaddr.AddressValueError, ipaddr.NetmaskValueError):
            print('\nerror: %s is not a valid network' % args[0])
            sys.exit(1)
        ips = [str(ip) for ip in network.iterhosts()]

    print('Scanning %d hosts...\n' % len(ips))

    # Create thread pool of workers
    pool = ThreadPool(processes=options.workers)
    try:
        # .get(2592000) will set the pool timeout to one month
        # This is a 'fix' to successfull catch keyboard interrupt
        pool.map_async(work_work, ips).get(2592000)
        pool.close()
        pool.join()
    except KeyboardInterrupt:
        print('Aborting.')
        sys.exit(1)

    # Save ips to file if needed
    if options.output_file is not None:
        try:
            with open(options.output_file, 'w') as f:
                # Uses inet_aton to convert IP to binary format
                # So the sort works as expected
                output_list.sort(key=lambda ip: inet_aton(ip))
                for ip in output_list:
                    f.write('%s\n' % ip)
        except Exception as ex:
            print(ex)
            sys.exit(1)

    print('\nFinished: %d hosts scanned' % len(ips))
    if options.reverse:
        print('Not responding hosts: %d' % len(output_list))
    else:
        print('Alive hosts: %d' % len(output_list))

    if options.output_file is not None:
        print("\nIPs list saved to '%s'" % options.output_file)
Exemple #59
0
    def __convert_dataset_to_coco(self,
                                  dataset: entities.Dataset,
                                  local_path,
                                  filters=None,
                                  annotation_filter=None):
        pages = dataset.items.list(filters=filters)
        dataset.download_annotations(local_path=local_path)
        path_to_dataloop_annotations_dir = os.path.join(local_path, 'json')

        labels = [label.tag for label in dataset.labels]
        np_labels = np.array(labels)
        class_list = np.unique(np_labels)

        label_to_id = {
            name: i
            for i, name in enumerate(class_list)
            if name not in ["done", 'completed', 'approved']
        }
        categories = [{
            'id': i,
            'name': name
        } for name, i in label_to_id.items()]

        images = [None for _ in range(pages.items_count)]
        converted_annotations = [None for _ in range(pages.items_count)]
        item_id_counter = 0
        pool = ThreadPool(processes=11)
        pbar = tqdm.tqdm(total=pages.items_count)
        for page in pages:
            for item in page:
                pool.apply_async(func=self.__single_item_to_coco,
                                 kwds={
                                     'item': item,
                                     'images': images,
                                     'path_to_dataloop_annotations_dir':
                                     path_to_dataloop_annotations_dir,
                                     'item_id': item_id_counter,
                                     'converted_annotations':
                                     converted_annotations,
                                     'annotation_filter': annotation_filter,
                                     'label_to_id': label_to_id,
                                     'pbar': pbar
                                 })
                item_id_counter += 1

        pool.close()
        pool.join()
        pool.terminate()
        pbar.close()

        total_converted_annotations = list()
        for ls in converted_annotations:
            if ls is not None:
                total_converted_annotations += ls

        coco_json = {
            'images': [image for image in images if image is not None],
            'annotations': total_converted_annotations,
            'categories': categories
        }

        with open(os.path.join(local_path, 'coco.json'), 'w+') as f:
            json.dump(coco_json, f)

        return coco_json
Exemple #60
0
class LocalDaskExecutor(Executor):
    """
    An executor that runs all functions locally using `dask` and a configurable
    dask scheduler.

    Args:
        - scheduler (str): The local dask scheduler to use; common options are
            "threads", "processes", and "synchronous".  Defaults to "threads".
        - **kwargs (Any): Additional keyword arguments to pass to dask config
    """
    def __init__(self, scheduler: str = "threads", **kwargs: Any):
        self.scheduler = self._normalize_scheduler(scheduler)
        self.dask_config = kwargs
        self._pool = None  # type: Optional[multiprocessing.pool.Pool]
        super().__init__()

    @staticmethod
    def _normalize_scheduler(scheduler: str) -> str:
        scheduler = scheduler.lower()
        if scheduler in ("threads", "threading"):
            return "threads"
        elif scheduler in ("processes", "multiprocessing"):
            return "processes"
        elif scheduler in ("sync", "synchronous", "single-threaded"):
            return "synchronous"
        else:
            raise ValueError(f"Unknown scheduler {scheduler!r}")

    def __getstate__(self) -> dict:
        state = self.__dict__.copy()
        state["_pool"] = None
        return state

    def __setstate__(self, state: dict) -> None:
        self.__dict__.update(state)

    def _interrupt_pool(self) -> None:
        """Interrupt all tasks in the backing `pool`, if any."""
        if self.scheduler == "threads" and self._pool is not None:
            # `ThreadPool.terminate()` doesn't stop running tasks, only
            # prevents new tasks from running. In CPython we can attempt to
            # raise an exception in all threads. This exception will be raised
            # the next time the task does something with the Python api.
            # However, if the task is currently blocked in a c extension, it
            # will not immediately be interrupted. There isn't a good way
            # around this unfortunately.
            import platform

            if platform.python_implementation() != "CPython":
                self.logger.warning(
                    "Interrupting a running threadpool is only supported in CPython, "
                    "all currently running tasks will continue to completion")
                return

            self.logger.info(
                "Attempting to interrupt and cancel all running tasks...")

            import sys
            import ctypes

            # signature of this method changed in python 3.7
            if sys.version_info >= (3, 7):
                id_type = ctypes.c_ulong
            else:
                id_type = ctypes.c_long

            for t in self._pool._pool:  # type: ignore
                ctypes.pythonapi.PyThreadState_SetAsyncExc(
                    id_type(t.ident), ctypes.py_object(KeyboardInterrupt))

    @contextmanager
    def start(self) -> Iterator:
        """Context manager for initializing execution."""
        # import dask here to reduce prefect import times
        import dask.config
        from dask.callbacks import Callback
        from dask.system import CPU_COUNT

        class PrefectCallback(Callback):
            def __init__(self):  # type: ignore
                self.cache = {}

            def _start(self, dsk):  # type: ignore
                overlap = set(dsk) & set(self.cache)
                for key in overlap:
                    dsk[key] = self.cache[key]

            def _posttask(self, key, value, dsk, state, id):  # type: ignore
                self.cache[key] = value

        with PrefectCallback(), dask.config.set(**self.dask_config):
            if self.scheduler == "synchronous":
                self._pool = None
            else:
                num_workers = dask.config.get("num_workers", None) or CPU_COUNT
                if self.scheduler == "threads":
                    from multiprocessing.pool import ThreadPool

                    self._pool = ThreadPool(num_workers)
                else:
                    from dask.multiprocessing import get_context

                    context = get_context()
                    self._pool = context.Pool(num_workers)
            try:
                exiting_early = False
                yield
            except BaseException:
                exiting_early = True
                raise
            finally:
                if self._pool is not None:
                    self._pool.terminate()
                    if exiting_early:
                        self._interrupt_pool()
                    self._pool.join()
                    self._pool = None

    def submit(self,
               fn: Callable,
               *args: Any,
               extra_context: dict = None,
               **kwargs: Any) -> "dask.delayed":
        """
        Submit a function to the executor for execution. Returns a `dask.delayed` object.

        Args:
            - fn (Callable): function that is being submitted for execution
            - *args (Any): arguments to be passed to `fn`
            - extra_context (dict, optional): an optional dictionary with extra
                information about the submitted task
            - **kwargs (Any): keyword arguments to be passed to `fn`

        Returns:
            - dask.delayed: a `dask.delayed` object that represents the
                computation of `fn(*args, **kwargs)`
        """
        # import dask here to reduce prefect import times
        import dask

        extra_kwargs = {}
        key = _make_task_key(**(extra_context or {}))
        if key is not None:
            extra_kwargs["dask_key_name"] = key
        return dask.delayed(fn, pure=False)(*args, **kwargs, **extra_kwargs)

    def wait(self, futures: Any) -> Any:
        """
        Resolves a (potentially nested) collection of `dask.delayed` object to
        its values. Blocks until the computation is complete.

        Args:
            - futures (Any): iterable of `dask.delayed` objects to compute

        Returns:
            - Any: an iterable of resolved futures
        """
        # import dask here to reduce prefect import times
        import dask

        return dask.compute(futures, scheduler=self.scheduler,
                            pool=self._pool)[0]