Exemple #1
0
def CommandPull():
    LogSuccess("\nInitiating PBGet pull command...", False)
    print("\n*************************\n")

    # Do not execute if Unreal Editor is running
    if PBTools.CheckRunningProcess("UE4Editor.exe"):
        LogError(
            "Unreal Editor is running. Please close it before running pull command"
        )
        sys.exit(1)

    # Parse packages xml file
    config_xml = ET.parse(config_name)

    fmt = '{:<28} {:<37} {:<10}'
    print(fmt.format("  ~Package Name~", "~Version~", "~Result~"))
    packages = IgnoreExistingInstallations(config_xml.getroot())

    # Async process packages
    pool = ThreadPool(cpu_count())
    pool.map_async(ProcessPackage,
                   [package for package in packages.findall("package")])

    # Release threads
    pool.close()
    pool.join()
Exemple #2
0
    def __run_test_read_hive_inserts(self, unique_database, partitioned):
        """Check that Impala can read a single insert only ACID table (over)written by Hive
    several times. Consistency can be checked by using incremental values for
    overwrites ('run') and inserts ('i').
    """
        tbl_name = "%s.test_read_hive_inserts" % unique_database
        part_expr = "partitioned by (p int)" if partitioned else ""

        CREATE_SQL = """create table %s (run int, i int) %s TBLPROPERTIES (
         'transactional_properties' = 'insert_only', 'transactional' = 'true')
         """ % (tbl_name, part_expr)
        self.client.execute(CREATE_SQL)

        def do_role(role):
            try:
                if role == "hive":
                    self.__hive_role_write_hive_inserts(tbl_name, partitioned)
                else:
                    self.__impala_role_read_hive_inserts(tbl_name)
            except Exception:
                traceback.print_exc()
                raise

        # TODO: CTRL+C can't interrupt the test
        pool = ThreadPool(processes=2)
        pool.map_async(do_role, ["impala", "hive"]).get(600)
Exemple #3
0
def addBounties(bounties):
    """Add a list of bounties in parallel using multiprocessing.Pool for verification"""
    from multiprocessing.pool import ThreadPool
    pool = ThreadPool()
    safeprint("Mapping verifications",verbosity=3)
    async = pool.map_async(verify,bounties)  #defer this for possible efficiency boost
    internal = pool.map(internalVerify,bounties)
    safeprint("Waiting for verifications",verbosity=3)
    external = async.get()
    safeprint("Received verifications",verbosity=3)
    rvals = []
    safeprint(internal)
    safeprint(external)
    for i in range(len(bounties)):
        safeprint("Finishing the processing of bounty " + str(i+1) + "/" + str(len(bounties)),verbosity=2)
        if not internal[i]:
            rvals.append(-3)
        elif not external[i]:
            rvals.append(-2)
        elif bounties[i] in bountyList:
            rvals.append(-1)
        elif internal[i] == -1:
            rvals.append(0)
        else:
            rvals.append(1)
        safeprint("Passed first if",verbosity=3)
        if rvals[i] == 1:
            addValidBounty(bounties[i])
    safeprint("Verifications parsed",verbosity=3)
    return rvals
Exemple #4
0
    def follow(self, penguin_id, dx=0, dy=0):
        @self._safe
        def equip(item_name):
            setattr(self, item_name, getattr(penguin, item_name))

        dx = self._require_int("dx", dx)
        dy = self._require_int("dy", dy)
        penguin = self.get_penguin(penguin_id)
        self._info('Following "{}"...'.format(penguin.name))
        if penguin.id == self.id:
            self._error("Cannot follow self")
        if penguin.id not in self._penguins:
            if penguin.id not in self.buddies or not self.buddies[
                    penguin.id].online:
                self._error('Penguin "{}" not in room'.format(penguin.name))
            self.room = self.find_buddy(penguin.id)
        self._follow = (penguin.id, dx, dy)
        pool = ThreadPool()
        pool.map_async(equip, [
            "color", "head", "face", "neck", "body", "hand", "feet", "pin",
            "background"
        ])
        pool.apply_async(self._safe(self.walk),
                         (penguin.x + dx, penguin.y + dy))
        pool.apply_async(self._safe(self.add_buddy), (penguin_id, ))
        pool.close()
        pool.join()
Exemple #5
0
def get_rxns_kegg(reactome, threads=20):
    rxns = list()
    pool = ThreadPool(processes=threads)
    pool.map_async(get_kegg_rxn_kegg, reactome, callback=rxns.append)
    pool.close()
    pool.join()
    return [j for i in rxns for j in i]
 def got_password_entries(self):
     if GlobalState.options.no_password_policies:
         self.controller.show_panel(views.ChoosePasswordsPanel)
         return
     def check_password_update_endpoint(login):
         print time.time(), threading.current_thread()
         if not login.get('domain'):
             return None
         print "checking", login['domain']
         scheme = login['scheme'] if GlobalState.options.ssl_not_required else 'https'
         announce_url = "%s://%s/.well-known/password-policy" % (scheme, login['domain'])
         try:
             result = requests.get(announce_url, verify=True, allow_redirects=False, timeout=5)
         except Exception as e:
             print e
             return
         if result.status_code != 200:
             return
         try:
             data = yaml.load(result.content)
         except Exception as e:
             print e
             return
         if not type(data)==dict or not data.get('endpoint') or not data['endpoint'].startswith('/'):
             return
         login['rule'] = PasswordEndpointRule(login['domain'], announce_url, data)
         print "got", login['domain'], data
     def check_complete(results):
         print "DONE"
         wx.CallAfter(self.controller.show_panel, views.ChoosePasswordsPanel)
     pool = ThreadPool(processes=50)
     pool.map_async(check_password_update_endpoint, GlobalState.logins, callback=check_complete)
     pub.sendMessage('wait')
Exemple #7
0
def main():
    # the processes set the process count for this pool.
    # default is the cpu num.
    # TODO: How about the thread stdout\stderr? what will happen? why?
    pool = ThreadPool(processes=3)

    # like built-in map, worker will yield the iterator.
    # TODO: chunksize?
    # TODO: callback?
    # pool.map(worker, [("test_worker_{}".format(x), ) for x in range(100)])

    # apply_async task
    # name = "test_worker"
    # pool.apply_async(worker, args=(name, ), kwds={}, callback=None)

    # map async
    pool.map_async(worker,
                   [("test_worker_{}".format(x), ) for x in range(100)])

    # TODO: what happened?
    # _worker_handler\_task_handler\_result_handler?
    # the pool class?
    # the _multiprocessing C lib?
    pool.close()
    pool.join()
Exemple #8
0
def createthreadparser(thread_count, files):
    pool = ThreadPool(int(thread_count))
    pool.map_async(getinn, files)
    pool.close()
    pool.join()

    return None
Exemple #9
0
def command_clean():
    log_success("\nInitiating PBGet clean command...", False)
    print("\n*************************\n")

    # Do not execute if Unreal Editor is running
    if PBTools.check_running_process("UE4Editor.exe"):
        log_error(
            "Unreal Editor is running. Please close it before running pull command"
        )
        sys.exit(1)

    # Parse packages xml file
    config_xml = ET.parse(config_name)
    packages = config_xml.getroot()

    if no_threading:
        for package in packages.findall("package"):
            clean_package(package)
    else:
        pool = ThreadPool(cpu_count())

        # Async process packages
        pool.map_async(clean_package,
                       [package for package in packages.findall("package")])

        # Release threads
        pool.close()
        pool.join()
Exemple #10
0
def command_pull():
    log_success("\nInitiating PBGet pull command...", False)
    print("\n*************************\n")

    # Do not execute if Unreal Editor is running
    if PBTools.check_running_process("UE4Editor.exe"):
        log_error(
            "Unreal Editor is running. Please close it before running pull command"
        )
        sys.exit(1)

    # Parse packages xml file
    config_xml = ET.parse(config_name)

    fmt = '{:<28} {:<37} {:<10}'
    print(fmt.format("  ~Package Name~", "~Version~", "~Result~"))
    packages = ignore_existing_installations(config_xml.getroot())

    if no_threading:
        for package in packages.findall("package"):
            process_package(package)
    else:
        # Async process packages
        pool = ThreadPool(cpu_count())
        pool.map_async(process_package,
                       [package for package in packages.findall("package")])

        # Release threads
        pool.close()
        pool.join()
Exemple #11
0
    def __init__(self, filename, pw, th):
        self.id_list = [
            i.strip() for i in open(filename).readlines()
            if i.strip() != '' and i
        ]
        if arg.random:
            random.shuffle(self.id_list)
        if arg.reverse:
            self.id_list = self.id_list[::-1]
        if arg.number:
            self.id_list = self.id_list[:arg.number]
        self.pw = pw
        # <-- data result -->
        self.data = {'succeeded': [], 'checkpoint': [], 'failed': []}
        self.t = 0
        self.raw = 0

        self.start = time.time()
        p = ThreadPool(int(th))
        try:
            p.map_async(self.run, self.id_list).get(9999)
        except KeyboardInterrupt:
            p.close()
        except Exception as e:
            p.terminate()
        self.print_data()
        p.close()
Exemple #12
0
def _parallel_execute(datasources, options, outs_dir, pabot_args, suite_names):
    original_signal_handler = signal.signal(signal.SIGINT, keyboard_interrupt)
    pool = ThreadPool(pabot_args['processes'])
    if pabot_args.get("vectors"):
        result = pool.map_async(execute_and_wait_with,
                    [(datasources,
                     outs_dir,
                     options,
                     suite,
                     pabot_args['command'],
                     pabot_args['verbose'],
                     vector)
                    for suite in suite_names
                    for vector in pabot_args['vectors']])
    else:
        result = pool.map_async(execute_and_wait_with,
                    [(datasources,
                     outs_dir,
                     options,
                     suite,
                     pabot_args['command'],
                     pabot_args['verbose'],
                     None)
                    for suite in suite_names])
    pool.close()
    while not result.ready():
        # keyboard interrupt is executed in main thread and needs this loop to get time to get executed
        try:
            time.sleep(0.1)
        except IOError:
            keyboard_interrupt()
    signal.signal(signal.SIGINT, original_signal_handler)
def get_list_of_cities_async(city_href_list: list, processes_count=-1) -> list:
    def split_list(l: list, n) -> list:
        return [l[i:i + n] for i in range(0, len(l), n)]

    if processes_count == -1:
        processes_count = len(city_href_list)

    try:
        list_splitted = split_list(city_href_list,
                                   len(city_href_list) // processes_count)
    except ZeroDivisionError:
        print("No tasks available...")
        return []
    except:
        print(
            "ERROR! Too many processes. To use maximum number of threads set 'processes_count' key-arg to: -1"
        )
        return []

    result_list = []

    def log_result(result):
        result_list.append(result)

    pool = ThreadPool(processes=processes_count)
    pool.map_async(get_city_attr_async, list_splitted, callback=log_result)

    pool.close()
    pool.join()

    return flatten(flatten(result_list))
Exemple #14
0
def addBounties(bounties):
    """Add a list of bounties in parallel using multiprocessing.Pool for verification"""
    from multiprocessing.pool import ThreadPool
    pool = ThreadPool()
    safeprint("Mapping verifications", verbosity=3)
    async = pool.map_async(verify, bounties)  # defer this for possible efficiency boost
    internal = pool.map(internalVerify, bounties)
    safeprint("Waiting for verifications", verbosity=3)
    external = async.get()
    safeprint("Received verifications", verbosity=3)
    rvals = []
    safeprint(internal)
    safeprint(external)
    for i in range(len(bounties)):
        safeprint("Finishing the processing of bounty " + str(i+1) + "/" + str(len(bounties)), verbosity=2)
        if not internal[i]:
            rvals.append(-3)
        elif not external[i]:
            rvals.append(-2)
        elif bounties[i] in bountyList:
            rvals.append(-1)
        elif internal[i] == -1:
            rvals.append(0)
        else:
            rvals.append(1)
            addValidBounty(bounties[i])
        safeprint("Passed first if", verbosity=3)
    safeprint("Verifications parsed", verbosity=3)
    return rvals
Exemple #15
0
def upload(stack, args):
    def upload_file(file):
        try:
            stack.file(file)
            log(f'Skipping: {file!r} (already exists)')
        except StackException:
            stack.upload(file)
            log(f'Uploaded: {file!r}')

    if os.path.isfile(args.file_or_directory):
        return upload_file(args.file_or_directory)

    log('Setting up directory structure..', prefix='+')

    # Set up directory structure, can't be threaded as a
    # sub directory might be created before a parent directory is created.
    for directory in directories(args.file_or_directory):
        log(f'Creating directory: {directory!r}')
        stack.mkdir(directory)

    log('Starting upload..', prefix='+')

    pool = ThreadPool(processes=args.threads)
    pool.map_async(upload_file, files(args.file_or_directory))
    pool.close()
    pool.join()
def start_bots():
    bots = ['sound_bot', 'ambience_bot', 'music_bot']

    pool = ThreadPool(processes=len(bots))
    pool.map_async(functools.partial(run_bot), (bot for bot in bots))
    pool.close()

    return 'Success'
Exemple #17
0
def main():
    hosts = ['192.168.200.134']
    #hosts=('192.168.200.134', '192.168.200.134')
    pool = ThreadPool(processes=3)
    #pool.map(request_ngx, hosts)
    pool.map_async(request_ngx, hosts, callback=Analysis_keys)
    pool.close()
    pool.join()
Exemple #18
0
def _parallel_execute_arrange(datasources, options, outs_dir, pabot_args, suite_names, para_mode):
    if (para_mode=="single"):
        original_signal_handler = signal.signal(signal.SIGINT, keyboard_interrupt)
    suite_length=len(suite_names)
    suiteGroupNum=[]
    i=0
    for suite in suite_names:
        if("Init" in suite):
            suiteGroupNum.append(i)
        elif("Destory" in suite):
            suiteGroupNum.append(i)
        i=i+1
    suiteGroup=[]
    if(len(suiteGroupNum)==0):
        suiteGroup=suite_names
    else:
        for j in range(0,len(suiteGroupNum)):
            list_start=suite_names[suiteGroupNum[j]]
            suiteGroup.append(list_start)
            list_middle=[]
            if(j==(len(suiteGroupNum)-1) and suiteGroupNum[j]==(len(suite_names)-1)):
                break
            elif(j==(len(suiteGroupNum)-1) and suiteGroupNum[j]<(len(suite_names)-1)):
                suiteGroupNum.append(len(suite_names))
            if(suiteGroupNum[j]+1<suiteGroupNum[j+1]):
                for m in range(suiteGroupNum[j]+1,suiteGroupNum[j+1]):
                    list_middle.append(suite_names[m])
                suiteGroup.append(list_middle)
    for i in range(0,len(suiteGroup)):
        suite_names=suiteGroup[i]
        if(isinstance(suite_names,str)):
            print "Running Init or End tests"
            pool = ThreadPool(1)
            suite=suite_names
            for argfile in pabot_args['argumentfiles'] or [("", None)]:
        	    pollArgsList=[(datasources, outs_dir, options, suite,pabot_args['command'], pabot_args['verbose'], argfile)]
       	            result=pool.map_async(execute_and_wait_with,pollArgsList)
            pool.close()
            pool.join()
        else:
            print "Running Middle tests"
            pool = ThreadPool(pabot_args['processes'])
            result = pool.map_async(execute_and_wait_with,
                                    ((datasources, outs_dir, options, suite,
                                      pabot_args['command'], pabot_args['verbose'], argfile)
                                     for suite in suite_names
                                     for argfile in pabot_args['argumentfiles'] or [("", None)]))
            pool.close()
            pool.join()
    while not result.ready():
        #keyboard interrupt is executed in main thread
        #and needs this loop to get time to get executed
        try:
            time.sleep(0.3)
        except IOError:
            keyboard_interrupt()
    if (para_mode == "single"):
        signal.signal(signal.SIGINT, original_signal_handler)
def get_discord_audio_function():
    audio_url = request.args.get('audio_url')
    audio_type = request.args.get('audio_type')

    pool = ThreadPool(processes=1)
    pool.map_async(functools.partial(check_bot_run, audio_url=audio_url), [type for type in [audio_type]])
    pool.close()

    return 'Playing music in discord!'
Exemple #20
0
def get_reactome_kegg(genome, threads=20):
    reactome = list()
    pool = ThreadPool(processes=threads)
    pool.map_async(get_kegg_rxns_from_gene_kegg,
                   genome,
                   callback=reactome.extend)
    pool.close()
    pool.join()
    return set([j for i in reactome for j in i])
def main():
    ACCESS_TOKEN = os.getenv('MIXIA_ACCESS_TOKEN')
    if not ACCESS_TOKEN:
        raise FetchFailed("`MIXIA_ACCESS_TOKEN` not found.")

    user = account.MiXiaUser.from_access_token(ACCESS_TOKEN)
    client = user.mixia_client

    try:
        album_ids = sys.argv[1:]
    except Exception:
        raise FetchFailed("Album id not found.")

    for aid in album_ids:
        thread_pool = ThreadPool(processes=10)
        album = song.MiXiaAlbum.from_id(aid, client)
        thread_pool.map_async(lambda s: s.fetch_detail(client, consts.TRACK_HIGH_QUALITY),
                              album.songs)
        thread_pool.close()
        thread_pool.join()

        ensure_dir(str(album.album_id))
        album_logo_resp = requests.get(
            album.big_logo,
            headers={
                'User-Agent':
                'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36'
            })
        album_logo_resp.raise_for_status()
        album_logo = album_logo_resp.content
        for s in album.songs:
            detail = s.track_detail
            filename = './{}/{}.mp3'.format(album.album_id, s.song_id)
            print filename
            subprocess.call(['wget', '-O', filename, detail.track_url])

            song_name = '{}_{}_{}'.format(detail.cd_serial, detail.track,
                                          detail.song_name.replace('/', '_'))

            if not eyed3:
                print "no eyed3, skip update ID3."
                os.rename(filename, os.path.join(album.album_id, song_name))
                continue

            song_id3 = eyed3.load(filename)
            song_id3.initTag()
            song_id3.rename(song_name)
            song_id3.tag.images.set(type_=3, img_data=album_logo, mime_type='image/jpeg')
            song_id3.tag.title = detail.song_name
            song_id3.tag.album = detail.album_name
            song_id3.tag.album_artist = detail.artist_name
            song_id3.tag.artist = detail.artist_name
            song_id3.tag.disc_num = (detail.cd_serial, album.cd_count)
            song_id3.tag.track_num = (detail.track, album.song_count)

            song_id3.tag.save()
Exemple #22
0
 def thread(self, user):
     try:
         self.t = int(raw_input("[?] Threads : "))
     except:
         print "[!] Masukan angka pada thread"
         self.thread(user)
     p = ThreadPool(self.t)
     try:
         p.map_async(self.klon, user).get(9999)
         self.result()
     except KeyboardInterrupt:
         p.close()
    def search(self,
               query,
               num_results=10,
               prefetch_pages=True,
               num_prefetch_threads=10):
        '''Perform the Google search.

        Parameters:
            String to search.
            Minimum number of result to stop search.
            Prefetch answered pages.
            Number of threads used t prefetch the pages.
            Time between thread executions in second to void IP block.
        '''
        search_results = []
        pages = int(
            math.ceil(num_results / float(GoogleSearch.RESULTS_PER_PAGE)))
        total = None
        thread_pool = None
        if prefetch_pages:
            thread_pool = ThreadPool(num_prefetch_threads)
        for i in range(pages):
            start = i * GoogleSearch.RESULTS_PER_PAGE
            opener = urllib.build_opener()
            opener.addheaders = GoogleSearch.DEFAULT_HEADERS
            with closing(
                    opener.open(GoogleSearch.SEARCH_URL + "?hl=en&q=" +
                                urllib.quote(query) + ("" if start == 0 else (
                                    "&start=" + str(start))))) as response:
                soup = BeautifulSoup(response.read(), "lxml")
            if total is None:
                if sys.version_info[0] > 2:
                    totalText = soup.select(
                        GoogleSearch.TOTAL_SELECTOR)[0].children.__next__()
                else:
                    totalText = soup.select(
                        GoogleSearch.TOTAL_SELECTOR)[0].children.next()
                total = int(
                    re.sub(
                        "[', ]", "",
                        re.search("(([0-9]+[', ])*[0-9]+)",
                                  totalText).group(1)))
            selector = GoogleSearch.RESULT_SELECTOR_PAGE1 if i == 0 else GoogleSearch.RESULT_SELECTOR
            self.results = self.parse_results(soup.select(selector), i)
            # if len(search_results) + len(self.results) > num_results:
            #     del self.results[num_results - len(search_results):]
            search_results += self.results
            if prefetch_pages:
                thread_pool.map_async(SearchResult.get_text, self.results)
        if prefetch_pages:
            thread_pool.close()
            thread_pool.join()
        return SearchResponse(search_results, total)
Exemple #24
0
    def scan_remote(self):
        """
        Start scanning the remote host and return the results
        :return: None
        """
        self.log(f"Scanning remote host ({len(self.all_files)} files "
                 f"over {len(self._files_local)} {self.mode}s)..")

        pool = ThreadPool(self.__max_remote_threads)
        pool.map_async(self.request, self.all_files)
        pool.close()
        pool.join()
    def generate_complete_href_list():
        clst = [c[COUNTRY] for c in COUNTRIES]
        pool = ThreadPool(processes=len(clst))

        result_list = []

        def get_res(res):
            result_list.extend(res)

        pool.map_async(get_all_wiki_href, clst, callback=get_res)
        pool.close()
        pool.join()
        return flatten(result_list)
Exemple #26
0
    def _run_command_threaded(self, cmd):
        """
        Runs the command `cmd` threaded. If `self.async` execute all concurrently, otherwise use a single thread.
        The single thread is needed so that the repl is not blocking.

        Parameter
        ---------
        cmd : str
            The command to execute in the qemu instance
        """
        # one thread per node for async, else 1
        pool = ThreadPool(processes=len(self.nodes) if self. async else 1)
        pool.map_async(self.run_command, zip(self.nodes, repeat(cmd)))
Exemple #27
0
def main(*xunitfile_and_result_dirs):
    tests = []
    for xunit_filename, result_dir in xunitfile_and_result_dirs:
        test_dir = os.path.dirname(os.path.abspath(xunit_filename))

        tree = ElementTree.parse(xunit_filename)
        root = tree.getroot()
        assemblies = root.findall('.//assembly')

        for filename in (node.attrib['filename'] for node in assemblies):
            tests.append((filename, test_dir, result_dir))

    threads = ThreadPool()
    threads.map_async(star_test, tests).get()
def main(*xunitfile_and_result_dirs):
    tests = []
    for xunit_filename, result_dir in xunitfile_and_result_dirs:
        test_dir = os.path.dirname(os.path.abspath(xunit_filename))

        tree = ElementTree.parse(xunit_filename)
        root = tree.getroot()
        assemblies = root.findall('.//assembly')

        for filename in (node.attrib['filename'] for node in assemblies):
            tests.append((filename, test_dir, result_dir))

    threads = ThreadPool()
    threads.map_async(star_test, tests).get()
Exemple #29
0
class ImageReader():
    def __init__(self, image_names, batch_size, threads):
        self.pool = ThreadPool(processes=threads)
        self.image_names = image_names
        self.batch_size = batch_size
        self.pos = 0

    def prefetch(self):
        if self.pos >= len(self.image_names):
            return False
        else:
            batch = self.image_names[
                self.pos:min(self.pos +
                             self.batch_size, len(self.image_names))]
            self.pos += self.batch_size
            self.p = self.pool.map_async(get_image, batch)
            return True

    def get_next(self):
        if self.prefetch():
            res = self.p.get()
            res = np.float32(res)
            return res
        else:
            print('Iterator exceed length')
            return None
Exemple #30
0
def _parallel_execute(datasources, options, outs_dir, pabot_args, suite_names):
    original_signal_handler = signal.signal(signal.SIGINT, keyboard_interrupt)
    pool = ThreadPool(pabot_args['processes'])
    if (pabot_args.has_key("hostsfile")):
        hosts = [host.rstrip('\r\n') for host in open(pabot_args["hostsfile"])]
    else:
        hosts = None
    if pabot_args["verbose"]:
        print [(suite,host) for (suite,host) in TestsuitesHosts(suite_names, hosts)]
    result = pool.map_async(execute_and_wait_with,
               [(datasources,
                 outs_dir,
                 options,
                 suite,
                 pabot_args['command'],
                 pabot_args['verbose'],
                 host)
                for (suite,host) in TestsuitesHosts(suite_names, hosts)])
    pool.close()
    while not result.ready():
        # keyboard interrupt is executed in main thread and needs this loop to get time to get executed
        try:
            time.sleep(0.1)
        except IOError:
            keyboard_interrupt()
    signal.signal(signal.SIGINT, original_signal_handler)
Exemple #31
0
 def discover(self):
     "extract subnet from given ip"
     if request.method == "POST":
         if "serverIP" in request.json:
             server_ip = request.json["serverIP"]
             if not server_ip:
                 logging.warning("No IP given, using local subnet")
                 server_ip = self.get_own_ip()
         else:
             logging.warning("No IP given, using local subnet")
             server_ip = self.get_own_ip()
     elif request.method == 'GET':
         server_ip = self.get_own_ip()
     else:
         logging.error("Unknown Method API Call")
     logging.info("Server ip: {}".format(server_ip))
     subnet = ".".join(server_ip.split(".")[0:-1])
     start_time = timeit.default_timer()
     p = ThreadPool(200)
     try:
         result = p.map_async(self.check_connection, self.get_allIP(subnet))
         result.wait(timeout=2)
         p.terminate()
     except:
         pass
     elapsed = timeit.default_timer() - start_time
     logging.info("Time elapsed: {} secs".format(round(elapsed, 2)))
     ip_list = self.ip_addr["ip"]  # store list in another variable
     self.ip_addr["ip"] = []  # Clear the list for next call
     # if len(ip_list) > 1:
     #     logging.warning(" More than one IP address received")
     return jsonify({"ip": ip_list})
    def launch_parallel_tests(self):
        image_name = "django_parallel_tests/%s" % self.project_name
        if len(self.docker.images(name=image_name)) == 0:
            self.build_image()

        req_hash = hashlib.sha224(str(sorted(self.requirements))).hexdigest()
        try:
            last_req_hash = open(".last_requirements").read().strip()
        except:
            last_req_hash = None

        if req_hash != last_req_hash:
            self.build_image()
            with open(".last_requirements", "w") as f:
                f.write(req_hash)

        pool = ThreadPool()
        tests = [[test] for test in self.tests]
        run_tests = partial(run_tests_for_project, self.project_name)

        result = pool.map_async(run_tests, tests)
        try:
            while True:
                time.sleep(0.1)
                if result.ready():
                    print "got result", result.get()
                    return
        except KeyboardInterrupt:
            pool.terminate()
            pool.join()
        else:
            pool.close()
            pool.join()
def async_request(n_request):
    pool = ThreadPool()
    result = pool.map_async(make_request,
                            [LONG_TEXT for _ in range(n_request)])
    pool.close()

    return result.get()
Exemple #34
0
    def check_online_streams(self):
        self.all_streams_offline = True
        self.set_status(' Checking online streams...')

        done_queue = queue.Queue()

        def check_stream_managed(args):
            url, queue = args
            status = self._check_stream(url)
            done_queue.put(url)
            return status

        pool = Pool(self.config.CHECK_ONLINE_THREADS)
        args = [(s['url'], done_queue) for s in self.streams]
        statuses = pool.map_async(check_stream_managed, args)
        n_streams = len(self.streams)

        while not statuses.ready():
            sleep(0.1)
            self.set_status(' Checked {0}/{1} streams...'.format(
                done_queue.qsize(), n_streams))
            self.s.refresh()

        statuses = statuses.get()
        for i, s in enumerate(self.streams):
            s['online'] = statuses[i]
            if s['online']:
                self.all_streams_offline = False

        self.refilter_streams()
        self.last_autocheck = int(time())

        pool.close()
Exemple #35
0
class ThreadMailer(object):

    def __init__(self, message, mailer):
        self.message = message
        self.mailer = mailer
        self.threadPool = ThreadPool()

    def initMessage(self, *args, **kwargs):
        message = Message(*args, **kwargs)
        return message

    def initMailer(self, host, user, pwd):
        mailer = Mailer(host)
        mailer.login(user, pwd)
        return mailer

    def sendEmail(self, receiver=None):
        if receiver:
            self.message.To = receiver
        result = self.mailer.send(self.message)
        return result

    def send(self):
        result = self.threadPool.map_async(self.sendEmail, self.message.To)
        _g = None
        try:
	    _g = result.get()
        except Exception, e:
            logger.error("send mail error.")
        return _g
    def check_online_streams(self):
        self.all_streams_offline = True
        self.set_status(' Checking online streams...')

        done_queue   = queue.Queue()

        def check_stream_managed(args):
            url, queue = args
            status = self._check_stream(url)
            done_queue.put(url)
            return status

        pool = Pool(self.config.CHECK_ONLINE_THREADS)
        args = [(s['url'], done_queue) for s in self.streams]
        statuses = pool.map_async(check_stream_managed, args)
        n_streams = len(self.streams)

        while not statuses.ready():
            sleep(0.1)
            self.set_status(' Checked {0}/{1} streams...'.format(done_queue.qsize(), n_streams))
            self.s.refresh()

        statuses = statuses.get()
        for i, s in enumerate(self.streams):
            s['online'] = statuses[i]
            if s['online']:
                self.all_streams_offline = False

        self.refilter_streams()
        self.last_autocheck = int(time())

        pool.close()
def extract_reports_mp(city_list,
                       process_count=12,
                       path='./reports',
                       report_count=50,
                       page_count=1,
                       mapping_csv='location_mapping.csv'):
    '''
    Extracts reports for specified cities from newspaper website.

    city_list : List of city str.  A list of cities, for which reports are to be extracted.
                Available city name ('Delhi','Mumbai','Bangalore',Kolkata')
    process_count : Int.  Number of parallel threads on which the scraping will happen.
    path : Valid path to directory (str). Path to the folder, where extracted reports will be writtern on disk.
    report_count : Int. Number of reports to be extracted.
    page_count : Int. Number of report pages to be scraped(Each page contains ~30 reports.)
    mapping_csv : Valid path to csv file (str). Path to the csv file, where report and its location mapping will be preserved.
    '''
    p = ThreadPool(process_count)
    argument_list = list(
        zip(city_list, [path] * len(city_list),
            [report_count] * len(city_list), [page_count] * len(city_list)))
    result = p.map_async(extract_reports, argument_list)
    master_location_mapping = {}

    for mapping in result.get():
        master_location_mapping.update(mapping)

    result_df = pd.DataFrame(master_location_mapping.items(),
                             columns=['filename', 'location'])

    result_df.to_csv(mapping_csv, index=False)
    print(f''' Reports location : {path}
location mapping : {mapping_csv}''')
Exemple #38
0
def runSqlCmdWithTimeOut(sql,
                         user,
                         host,
                         port,
                         tmpPath,
                         database="postgres",
                         mpprcFile="",
                         needmpara=False,
                         timeout=60):
    """
    function: run sql cmd with timeout
    input  : sql, user, host, port, tmpPath, database
            mpprcFile, needmpara, timeou
    output : str
    """
    infoList = [[
        sql, user, host, port, tmpPath, database, mpprcFile, needmpara
    ]]
    endTime = datetime.now() + timedelta(seconds=timeout)
    pool = ThreadPool(1)
    result = pool.map_async(executeSql, infoList)
    while datetime.now() < endTime:
        if (result._ready):
            pool.close()
            if (result._value[0] == "NO RESULT"):
                return ""
            elif (result._value[0].startswith("ERROR")):
                raise SQLCommandException(sql, result._value[0])
            else:
                return result._value[0]
        else:
            time.sleep(1)
    pool.close()
    raise SQLCommandException(
        sql, "Running timeout, exceed the limit %s seconds" % timeout)
Exemple #39
0
    def run(self):
        if not self.platforms:
            raise RuntimeError("No enabled platform to build on")

        thread_pool = ThreadPool(len(self.platforms))
        result = thread_pool.map_async(self.select_and_start_cluster, self.platforms)

        try:
            result.get()
        # Always clean up worker builds on any error to avoid
        # runaway worker builds (includes orchestrator build cancellation)
        except Exception:
            thread_pool.terminate()
            self.log.info('build cancelled, cancelling worker builds')
            if self.worker_builds:
                ThreadPool(len(self.worker_builds)).map(
                    lambda bi: bi.cancel_build(), self.worker_builds)
            while not result.ready():
                result.wait(1)
            raise
        else:
            thread_pool.close()
            thread_pool.join()

        fail_reasons = {
            build_info.platform: build_info.get_fail_reason()
            for build_info in self.worker_builds
            if not build_info.build or not build_info.build.is_succeeded()
        }

        if fail_reasons:
            raise PluginFailedException(json.dumps(fail_reasons))
    def get(self):
        key = self.request.get('key')
        if key:
            next_key, result = channel.read(key)
            response = json.dumps(dict(
                next_key=next_key,
                result=result,
            ))
            self.response.write(response)
            return

        handles = self.request.get_all('handles')
        key = channel.create()
        pool = ThreadPool(len(handles))
        pool.map_async(lambda handle: get_last_tweet(key, handle), handles)
        response = dict(next_key=key)
        self.response.write(json.dumps(response))
Exemple #41
0
def index(request):
    """
    Pretty much runs a Map-Reduce job on the Title-Value Sets
    
    return_objs looks like:
    [
        {T1: [V1, V2, V3]},
        {T2: [V1, V2, V3]},
        {T3: [V1, V2, V3]},
        {T4: [V1, V2, V3]},
    ]
    """
    # Limit represents the number of times the title occurs on amazon.com's query sets. Used for filtering by weight. 
    return_objs = []
    touched_titles = []
    limit = 1000
    
    def append_title(title):
        """
        Appends the title to the array along with the value set belonging to it. 
        """
        touched_titles.append(title)
        touched_values = []
        
        matched_values = Value.objects.filter(query_title__name=title).exclude(name=None).values("name")
        
        if len(matched_values) <= limit:
            print "Returned"
            return
        
        def append_value(value):
            touched_values.append(value["name"])

        # Appends the value to the value set if it's not None. 
        map(append_value, filter(lambda x: x != "None", matched_values.values()))
        
        # Sets the value set of the title if the value set is not empty.  
        if len(touched_values) > 0:
            touched_values = list(set(touched_values))
            touched_values.append(unicode("None", "utf-8"))
            print "Added Values"
            return_objs.append({
                title: reversed(touched_values)
                })

    # Loads all values for the queried titles into RAM
    titles = Title.objects.all().exclude(name=None).prefetch_related('values').values('name').distinct()
    # Makes a unique set of title strings 
    titles = list(set([title["name"] for title in titles]))
    
    # Multithreads the queried titles to grab each value set for the title.
    pool = ThreadPool()
    res = pool.map_async(append_title, titles)
    m = res.get()
    pool.close()
    
    # Returns the object to the Django Template as a dinctionary. 
    return render_to_response("list.html", dict(list_titles=return_objs), context_instance=RequestContext(request))
Exemple #42
0
    def get_list(self):
        def validate_distro(distro):
            if check_url_path(distro['path']):
                return distro['name']

        n_processes = len(self.distros.keys())
        pool = ThreadPool(processes=n_processes)
        map_res = pool.map_async(validate_distro, self.distros.values())
        pool.close()
        pool.join()
        res = list(set(map_res.get()) - set([None]))
        return sorted(res)
Exemple #43
0
def _parallel_execute(items, processes):
    original_signal_handler = signal.signal(signal.SIGINT, keyboard_interrupt)
    pool = ThreadPool(processes)
    result = pool.map_async(execute_and_wait_with, items, 1)
    pool.close()
    while not result.ready():
        # keyboard interrupt is executed in main thread
        # and needs this loop to get time to get executed
        try:
            time.sleep(0.1)
        except IOError:
            keyboard_interrupt()
    signal.signal(signal.SIGINT, original_signal_handler)
    def run(self):
        if not self.platforms:
            raise RuntimeError("No enabled platform to build on")
        self.set_build_image()

        thread_pool = ThreadPool(len(self.platforms))
        result = thread_pool.map_async(self.select_and_start_cluster, self.platforms)

        try:
            result.get()
        # Always clean up worker builds on any error to avoid
        # runaway worker builds (includes orchestrator build cancellation)
        except Exception:
            thread_pool.terminate()
            self.log.info('build cancelled, cancelling worker builds')
            if self.worker_builds:
                ThreadPool(len(self.worker_builds)).map(
                    lambda bi: bi.cancel_build(), self.worker_builds)
            while not result.ready():
                result.wait(1)
            raise
        else:
            thread_pool.close()
            thread_pool.join()

        annotations = {'worker-builds': {
            build_info.platform: build_info.get_annotations()
            for build_info in self.worker_builds if build_info.build
        }}

        self._apply_repositories(annotations)

        labels = self._make_labels()

        fail_reasons = {
            build_info.platform: build_info.get_fail_reason()
            for build_info in self.worker_builds
            if not build_info.build or not build_info.build.is_succeeded()
        }

        workspace = self.workflow.plugin_workspace.setdefault(self.key, {})
        workspace[WORKSPACE_KEY_UPLOAD_DIR] = self.koji_upload_dir
        workspace[WORKSPACE_KEY_BUILD_INFO] = {build_info.platform: build_info
                                               for build_info in self.worker_builds}

        if fail_reasons:
            return BuildResult(fail_reason=json.dumps(fail_reasons),
                               annotations=annotations, labels=labels)

        return BuildResult.make_remote_image_result(annotations, labels=labels)
Exemple #45
0
def execute(filename):
    def run(size):
        # for size in size_block:
        cmd = ['cjpeg', '-q', '-n {}'.format(size),
               '--no-save', '{}'.format(filename)]

        try:
            process = subprocess.Popen(cmd)
            process.wait()
        except Exception:
            pass

    runner = ThreadPool(processes=_MAX_THREADS)
    result = runner.map_async(run, size_block)
    result.wait()
Exemple #46
0
    def get_list(self):
        def validate_distro(distro):
            if check_url_path(distro['path']):
                return distro['name']

        n_processes = len(self.distros.keys())
        # Avoid problems if the for some reason the files are not in the right
        # place, or were deleted, or moved or not supported in the arch
        if n_processes < 1:
            return []
        pool = ThreadPool(processes=n_processes)
        map_res = pool.map_async(validate_distro, self.distros.values())
        pool.close()
        pool.join()
        res = list(set(map_res.get()) - set([None]))
        return sorted(res)
def main():
    print "Fetching exercise data..."
    request = requests.get("http://khanacademy.org/api/v1/exercises")
    if request.status_code != 200:
        print "Error: failed to fetch exercises"
        sys.exit(1)
    exercises = [(e["name"], e["ka_url"]) for e in request.json()]
    pool = ThreadPool()
    try:
        # see http://stackoverflow.com/a/1408476
        results = pool.map_async(process_exercise, exercises).get(99999)
    except KeyboardInterrupt:
        sys.exit(1)
    success_count = results.count(True)
    failure_count = len(results) - success_count
    print "Done (%s successes, %s failures)" % (success_count, failure_count)
Exemple #48
0
  def process_all(self):
    """
    process all desired_results in the database
    """
    self.lap_timer()  # reset timer
    q = self.query_pending_desired_results()

    if self.interface.parallel_compile:
      desired_results = []
      thread_args = []

      def compile_result(args):
        interface, data, result_id = args
        return interface.compile(data, result_id)

      for dr in q.all():
        if self.claim_desired_result(dr):
          desired_results.append(dr)
          thread_args.append((self.interface, dr.configuration.data, dr.id))
      if len(desired_results) == 0:
        return
      thread_pool = ThreadPool(len(desired_results))
      # print 'Compiling %d results' % len(thread_args)
      try:
        # Use map_async instead of map because of bug where keyboardinterrupts are ignored
        # See http://stackoverflow.com/questions/1408356/keyboard-interrupts-with-pythons-multiprocessing-pool
        compile_results = thread_pool.map_async(compile_result,
                                                thread_args).get(9999999)
      except Exception:
        # Need to kill other processes because only one thread receives
        # exception
        self.interface.kill_all()
        raise
      # print 'Running %d results' % len(thread_args)
      for dr, compile_result in zip(desired_results, compile_results):
        # Make sure compile was successful
        self.run_desired_result(dr, compile_result, dr.id)
        try:
          self.interface.cleanup(dr.id)
        except RuntimeError as e:
          print(e)
          # print 'Done!'
      thread_pool.close()
    else:
      for dr in q.all():
        if self.claim_desired_result(dr):
          self.run_desired_result(dr)
Exemple #49
0
def _parallel_execute(datasources, options, outs_dir, pabot_args, suite_names):
    if suite_names:
        original_signal_handler = signal.signal(signal.SIGINT, keyboard_interrupt)
        pool = ThreadPool(pabot_args['processes'])
        if pabot_args['seed']:
            rand = random.Random( pabot_args['seed'] )
            rand.shuffle( suite_names )
        if pabot_args['load_balancing']:
            shared_resources = (None, None)
            if pabot_args['resources']:
                shared_resources = _create_queue( pabot_args['resources'] )
            suite_names_distrib = [([i], shared_resources) for i in suite_names]
        else:
            shared_resources = [ (None, None) ] * pabot_args['processes']
            if pabot_args['resources']:
                assert( pabot_args['processes'] == len(pabot_args['resources']) )
                shared_resources = [ _create_queue([e]) for e in pabot_args['resources'] ]
            suite_names_distrib = zip( _pre_compute_distrib( suite_names, pabot_args['processes'] ), shared_resources )

        if pabot_args['verbose']:
            print 'Parallel execution of suites: '
            for (suite, (resources_queue, resources_names)) in suite_names_distrib:
                print '- %s' % (str(suite) if len(suite) > 1 else suite[0]),
                if resources_names:
                    print "using resource %s" % ("from %s" % str(resources_names) if len(resources_names) > 1 else "'%s'" % resources_names[0]),
                print
        
        result = pool.map_async(execute_and_wait_with,
                   [(datasources,
                     outs_dir,
                     options,
                     suite,
                     pabot_args['command'],
                     pabot_args['verbose'],
                     resources_queue)
                    for (suite, (resources_queue, resources_names)) in suite_names_distrib])
        while not result.ready():
            # keyboard interrupt is executed in main thread and needs this loop to get time to get executed
            try:
                time.sleep(0.1)
            except IOError:
                keyboard_interrupt()
        pool.close()
        pool.join()
        result.get() #throw exception from workers if any
        signal.signal(signal.SIGINT, original_signal_handler)
Exemple #50
0
def _parallel_execute(datasources, options, outs_dir, pabot_args, suite_names):
    original_signal_handler = signal.signal(signal.SIGINT, keyboard_interrupt)
    pool = ThreadPool(pabot_args['processes'])
    result = pool.map_async(execute_and_wait_with,
                            ((datasources, outs_dir, options, suite,
                              pabot_args['command'], pabot_args['verbose'], argfile)
                             for suite in suite_names
                             for argfile in pabot_args['argumentfiles'] or [("", None)]))
    pool.close()
    while not result.ready():
        # keyboard interrupt is executed in main thread
        # and needs this loop to get time to get executed
        try:
            time.sleep(0.1)
        except IOError:
            keyboard_interrupt()
    signal.signal(signal.SIGINT, original_signal_handler)
Exemple #51
0
def _parallel_execute(datasources, options, outs_dir, pabot_args, suite_names):
    original_signal_handler = signal.signal(signal.SIGINT, keyboard_interrupt)
    pool = ThreadPool(pabot_args["processes"])
    result = pool.map_async(
        execute_and_wait_with,
        [
            (datasources, outs_dir, options, suite, pabot_args["command"], pabot_args["verbose"])
            for suite in suite_names
        ],
    )
    pool.close()
    while not result.ready():
        # keyboard interrupt is executed in main thread and needs this loop to get time to get executed
        try:
            time.sleep(0.1)
        except IOError:
            keyboard_interrupt()
    signal.signal(signal.SIGINT, original_signal_handler)
Exemple #52
0
def _map_async(pool_size, func, args):
  """Async map (threading), handling python 2.6 edge case.

  :param pool_size: Maximum number of threads.
  :param func: Function to run.
  :param args: Iterable of arguments (one per thread).

  This is necessary since using `map` will in general prevent keyboard
  interrupts from functioning properly (see this thread for more details -
  http://stackoverflow.com/a/1408476/1062617), but `map_async` hangs in python
  2.6.

  """
  pool = ThreadPool(pool_size)
  if sys.version_info <= (2, 6):
    return pool.map(func, args)
  else:
    return pool.map_async(func, args).get(1 << 31)
Exemple #53
0
def main():
    pattener = ["*.jpg", '*.jpeg', '*.tiff', '*.bmp', "*.png"]

    times = 10
    _pool = ThreadPool(processes=_MAX_THREADS)
    files = []
    root = argv[1] if len(argv) > 1 else '.'

    for pat in pattener:
        files += glob.glob('{}/**/{}'.format(root, pat), recursive=True)

    stdout.write('{} files found\n'.format(len(files)))

    if len(files) == 0:
        return

    for i in range(1, times):
        print('run {}/{}'.format(i, times))
        result = _pool.map_async(execute, files)
        result.wait()

    print(result)
Exemple #54
0
  def process_all(self):
    '''
    process all desired_results in the database
    '''
    self.lap_timer() #reset timer
    q = (self.session.query(DesiredResult)
         .filter_by(tuning_run = self.tuning_run,
                    state = 'REQUESTED')
         .order_by(DesiredResult.generation,
                   DesiredResult.priority.desc()))

    if self.interface.parallel_compile:
      desired_results = []
      thread_args = []
      def compile_result(args):
        interface, data, result_id = args
        return interface.compile(data, result_id)
      for dr in q.all():
        if self.claim_desired_result(dr):
          desired_results.append(dr)
          thread_args.append((self.interface, dr.configuration.data, dr.id))
      thread_pool = ThreadPool(len(desired_results))
      # print 'Compiling %d results' % len(thread_args)
      try:
        # Use map_async instead of map because of bug where keyboardinterrupts are ignored
        # See http://stackoverflow.com/questions/1408356/keyboard-interrupts-with-pythons-multiprocessing-pool
        compile_results = thread_pool.map_async(compile_result, thread_args).get(9999999)
      except Exception:
        # Need to kill other processes because only one thread receives exception
        self.interface.kill_all()
        raise
      # print 'Running %d results' % len(thread_args)
      for dr, compile_result in zip(desired_results, compile_results):
        # Make sure compile was successful
        self.run_desired_result(dr, compile_result, dr.id)
        try:
          self.interface.cleanup(dr.id)
        except RuntimeError, e:
          print e
class CloudBigTableDataStore(data_store.DataStore):
  """GCP CloudBigtable based data storage system.

  Note Cloud Bigtable only supports timestamp precision in milli seconds. All
  other GRR datastores support microseconds.

  Note that currently it isn't safe to use the bigtable garbage collection to
  make data disappear out from under the system, except for the two cases we use
  by default here.  Also, exposing the full power of the bigtable garbage
  collection system via configuration is very complicated. You can have nested
  AND and OR garbage collection rules, see http://goo.gl/L6Oh9i. If we decide to
  use this more extensively in the future we'll provide a sensible default gc
  strategy and tell people to modify using the bigtable client if they want to
  change it later.
  """

  COLUMN_FAMILIES = {
      "aff4": {},
      "metadata": {
          "versions": 1
      },
      "flow": {
          "versions": 1
      },
      "index": {},
      "notify": {},
      "kw_index": {},
      "task": {},
  }

  def __init__(self):
    super(CloudBigTableDataStore, self).__init__()
    self.lock = threading.RLock()
    self.instance = None
    self.table = None
    self._CalculateAttributeStorageTypes()

  # We can deprecate this once there is something included in the library:
  # https://github.com/GoogleCloudPlatform/gcloud-python/issues/2117
  def WaitOnOperation(self, operation, max_tries=4, delay=1, backoff=2):
    tries = 0
    while tries < max_tries:
      if operation.finished():
        return operation
      delay *= backoff**tries
      time.sleep(delay)
      tries += 1

  def GetInstance(self, btclient, instance_id):
    instances, _ = btclient.list_instances()
    for instance in instances:
      if instance.instance_id == instance_id:
        return instance
    return None

  def GetTable(self, instance, table_name):
    for table in instance.list_tables():
      if table.table_id == table_name:
        return table
    return None

  def StartClient(self, project_id=None, instance_id=None):
    # Connection to bigtable is fairly expensive so we open one and re-use it.
    # https://cloud.google.com/bigtable/docs/performance
    self.btclient = bigtable.Client(project=project_id)
    self.instance = self.btclient.instance(instance_id)
    self.table = self.instance.table(
        config_lib.CONFIG["CloudBigtable.table_name"])

  def Initialize(self, project_id=None, instance_id=None):
    super(CloudBigTableDataStore, self).Initialize()
    project_id = project_id or config_lib.CONFIG["CloudBigtable.project_id"]
    if not project_id:
      raise AccessError(
          "No Google Cloud project ID specified, can't create instance.")

    instance_id = instance_id or config_lib.CONFIG["CloudBigtable.instance_id"]

    self.CreateInstanceAndTable(project_id=project_id, instance_id=instance_id)
    self.StartClient(project_id=project_id, instance_id=instance_id)
    self.pool = ThreadPool(config_lib.CONFIG["CloudBigtable.threadpool_size"])

  def CreateInstanceAndTable(self, project_id=None, instance_id=None):
    # The client must be created with admin=True because it will create a
    # table.
    btclient = bigtable.Client(project=project_id, admin=True)
    tablename = config_lib.CONFIG["CloudBigtable.table_name"]
    instance_name = config_lib.CONFIG["CloudBigtable.instance_name"]

    btinstance = self.GetInstance(btclient, instance_id)
    if not btinstance:
      logging.info("Creating cloud bigtable: %s.%s in %s", instance_id,
                   tablename, project_id)
      btinstance = btclient.instance(
          instance_id,
          display_name=instance_name,
          serve_nodes=config_lib.CONFIG["CloudBigtable.serve_nodes"],
          location=config_lib.CONFIG["CloudBigtable.instance_location"])
      operation = btinstance.create()
      self.WaitOnOperation(operation)

    table = self.GetTable(btinstance, tablename)
    if not table:
      table = btinstance.table(tablename)
      table.create()
      for column, gc_rules in self.COLUMN_FAMILIES.iteritems():
        gc_rule = None
        if gc_rules:
          age = gc_rules.get("age", None)
          if age:
            gc_rule = bigtable.column_family.MaxAgeGCRule(age)

          version_max = gc_rules.get("versions", None)
          if version_max:
            gc_rule = bigtable.column_family.MaxVersionsGCRule(version_max)

        cf = table.column_family(column, gc_rule=gc_rule)
        cf.create()

    return btinstance

  def DeleteSubject(self, subject, sync=False, token=None):
    self.DeleteSubjects([subject], sync=sync, token=token)

  def DeleteSubjects(self, subjects, sync=False, token=None):
    self.security_manager.CheckDataStoreAccess(token, subjects, "w")

    # Currently there is no multi-row mutation support, but it exists in the
    # RPC API.
    # https://github.com/GoogleCloudPlatform/google-cloud-python/issues/2411
    # So we delete all subjects at once using a threadpool
    pool_args = []
    for subject in subjects:
      row = self.table.row(utils.SmartStr(subject))
      row.delete()
      pool_args.append(((row.commit, "delete"), {}))

    if sync:
      self.pool.map(self._WrapCallWithRetry, pool_args)
    else:
      self.pool.map_async(self._WrapCallWithRetry, pool_args)

  def _CalculateAttributeStorageTypes(self):
    """Build a mapping between column names and types.

    Since BT only stores bytes, we need to record the basic types that are
    required to be stored for each column.
    """
    self._attribute_types = {}

    for attribute in aff4.Attribute.PREDICATES.values():
      self._attribute_types[attribute.predicate] = (
          attribute.attribute_type.data_store_type)

  def Encode(self, attribute, value):
    """Encode the value for the attribute."""
    required_type = self._attribute_types.get(attribute, "bytes")
    if required_type in ("integer", "unsigned_integer"):
      return structs.VarintEncode(int(value))
    elif hasattr(value, "SerializeToString"):
      return value.SerializeToString()
    else:
      # Types "string" and "bytes" are stored as strings here.
      return utils.SmartStr(value)

  def Decode(self, attribute, value):
    """Decode the value to the required type."""
    required_type = self._attribute_types.get(attribute, "bytes")
    if required_type in ("integer", "unsigned_integer"):
      return structs.VarintReader(value, 0)[0]
    elif required_type == "string":
      return utils.SmartUnicode(value)
    else:
      return value

  def DBSubjectLock(self, subject, lease_time=None, token=None):
    return CloudBigtableLock(self, subject, lease_time=lease_time, token=token)

  def DatetimeToMicroseconds(self, datetime_utc):
    # How much do I hate datetime? let me count the ways.
    if datetime_utc.tzinfo != pytz.utc:
      raise ValueError(
          "DatetimeToMicroseconds can only safely convert UTC datetimes")
    epoch = datetime.datetime(1970, 1, 1, 0, 0, tzinfo=pytz.utc)  # pylint: disable=g-tzinfo-datetime
    diff = datetime_utc - epoch
    return int(diff.total_seconds() * 1e6)

  def DatetimeFromMicroseconds(self, time_usec):
    seconds = float(time_usec) / 1000000
    dt = datetime.datetime.utcfromtimestamp(seconds)
    return dt.replace(tzinfo=pytz.utc)  # pylint: disable=g-tzinfo-replace

  def GetFamilyColumn(self, attribute):
    return utils.SmartStr(attribute).split(":", 1)

  def _DeleteAllTimeStamps(self, row, attribute_list):
    """Add delete mutations to row, but don't commit."""
    delete_dict = {}
    # Group column families together so we can use delete_cells
    for attribute in attribute_list:
      family, column = self.GetFamilyColumn(attribute)
      delete_dict.setdefault(family, []).append(column)
    for family, column in delete_dict.iteritems():
      row.delete_cells(family, column)

  def Set(self,
          subject,
          attribute,
          value,
          timestamp=None,
          token=None,
          replace=True,
          sync=True):

    self.MultiSet(
        subject, {attribute: [value]},
        timestamp,
        token=token,
        replace=replace,
        sync=sync)

  def MultiSet(self,
               subject,
               values,
               timestamp=None,
               replace=True,
               sync=True,
               to_delete=None,
               token=None):
    self.security_manager.CheckDataStoreAccess(token, [subject], "w")
    row = self.table.row(utils.SmartStr(subject))
    if to_delete:
      self._DeleteAllTimeStamps(row, to_delete)

    for attribute, value_list in values.items():
      # Attributes must be strings
      family, column = self.GetFamilyColumn(attribute)

      if replace:
        row.delete_cell(family, column)

      for value in value_list:
        element_timestamp = timestamp
        if isinstance(value, tuple):
          try:
            value, element_timestamp = value
          except (TypeError, ValueError):
            pass

        if element_timestamp is None:
          datetime_ts = datetime.datetime.utcnow()
        else:
          datetime_ts = self.DatetimeFromMicroseconds(element_timestamp)

        # Value parameter here is bytes, so we need to encode unicode to a byte
        # string:
        # https://googlecloudplatform.github.io/google-cloud-python/stable/bigtable-row.html#google.cloud.bigtable.row.DirectRow.set_cell
        value = self.Encode(attribute, value)
        row.set_cell(family, column, value, timestamp=datetime_ts)

    if sync:
      self.CallWithRetry(row.commit, "write")
    else:
      self.pool.map_async(self._WrapCallWithRetry,
                          [((row.commit, "write"), {})])

  def DeleteAttributes(self,
                       subject,
                       attributes,
                       start=None,
                       end=None,
                       sync=True,
                       token=None):
    self.MultiDeleteAttributes(
        [subject], attributes, start=start, end=end, sync=sync, token=token)

  def MultiDeleteAttributes(self,
                            subjects,
                            attributes,
                            start=None,
                            end=None,
                            sync=True,
                            token=None):

    subjects = [utils.SmartStr(subject) for subject in subjects]
    self.security_manager.CheckDataStoreAccess(token, subjects, "w")

    if isinstance(attributes, basestring):
      raise ValueError(
          "String passed to DeleteAttributes (non string iterable expected).")

    attributes = [utils.SmartStr(x) for x in attributes]

    for subject in subjects:
      row = self.table.row(subject)
      for attribute in attributes:
        if start is None and end is None:
          self._DeleteAllTimeStamps(row, [attribute])
        else:
          family, column = self.GetFamilyColumn(attribute)
          row.delete_cell(
              family,
              column,
              time_range=self._TimestampRangeFromTuple((start, end)))

      if sync:
        self.CallWithRetry(row.commit, "delete")
      else:
        self.pool.map_async(self._WrapCallWithRetry,
                            [((row.commit, "delete"), {})])

  def _TimestampRangeFromTuple(self, ts_tuple):
    start, end = ts_tuple
    if start is not None:
      if start == 0:
        start = None
      else:
        # Convert RDFDatetime to usec
        start = float(start)
        # Bigtable can only handle ms precision:
        # https://github.com/GoogleCloudPlatform/google-cloud-python/issues/2626
        # If we give it a filter with usec values it raises RPC error with
        # "Timestamp granularity mismatch".  Truncate to ms here.
        start -= start % 1000
        start = self.DatetimeFromMicroseconds(start)

    if end is not None:
      # Convert RDFDatetime to usec
      end = float(end)
      # Some searches use 2**64 signed int to signal "no upper limit", there's a
      # better way to do that with the API using None.
      if end >= (2**64) / 2:
        end = None
      else:
        # Truncate to ms
        end -= end % 1000
        # GRR expects inclusive timestamps for upper and lower. TimestampRange
        # is exclusive on the end. So we add 1ms to the upper bound, which is
        # the next smallest timestamp bigtable will accept.
        # https://github.com/GoogleCloudPlatform/google-cloud-python/issues/2608
        end += 1000
        end = self.DatetimeFromMicroseconds(end)
    return row_filters.TimestampRange(start=start, end=end)

  def _TimestampToFilter(self, timestamp):
    if timestamp == data_store.DataStore.ALL_TIMESTAMPS:
      return None

    if timestamp is None or timestamp == data_store.DataStore.NEWEST_TIMESTAMP:
      # Latest value only
      return row_filters.CellsColumnLimitFilter(1)

    if isinstance(timestamp, tuple):
      return row_filters.TimestampRangeFilter(
          self._TimestampRangeFromTuple(timestamp))

    raise data_store.Error("Invalid timestamp specification: %s." % timestamp)

  def CallWithRetry(self, callback, mode, *args, **kwargs):
    """Make the bigtable RPC with retries.

    Args:
      callback: a function to call, typically a bigtable row mutation.commit
      mode: A string to indicate what kind of db operation this is "read",
        "write", "delete".
      *args: args to pass to the callback
      **kwargs: keyword args to pass to the callback

    Returns:
      Callback result.

    Raises:
      AccessError: if we hit our RPC retry limit, or the RPC error isn't
      retryable.
      ValueError: if you pass an unknown operation in mode.
    """
    if mode not in set(["read", "write", "delete"]):
      raise ValueError("Mode must be 'read', 'write', 'delete'")

    retry_count = 0
    sleep_interval = config_lib.CONFIG["CloudBigtable.retry_interval"]
    while retry_count < config_lib.CONFIG["CloudBigtable.retry_max_attempts"]:

      try:
        response = callback(*args, **kwargs)
        return response
      except (face.ExpirationError, face.AbortionError) as e:
        last_error = e
        last_traceback = traceback.format_exc()
        print "Retrying %s" % last_traceback

      time.sleep(sleep_interval.seconds)
      logging.info("Retrying callback: %s", callback)
      retry_count += 1
      stats.STATS.IncrementCounter("grr_cloud_bigtable_%s_retries" % mode)
      sleep_interval *= config_lib.CONFIG["CloudBigtable.retry_multiplier"]

    stats.STATS.IncrementCounter("grr_cloud_bigtable_%s_failures" % mode)
    logging.error("Gave up on %s %s after %s retries. %s", mode, callback,
                  retry_count, last_traceback)
    raise AccessError(
        "Giving up on %s callback:%s after %s retries. Last error: %s." %
        (mode, callback, retry_count, last_error))

  def _WrapCallWithRetry(self, argstuple):
    """Workaround not being able to pass kwargs to threadpool callback."""
    callargs, kwargs = argstuple
    return self.CallWithRetry(*callargs, **kwargs)

  def _SortResultsByAttrTimestampValue(self, result_list):
    """Sort order: attribute ASC, timestamp DESC, value ASC."""
    return sorted(result_list, key=lambda (a, val, ts): (a, -ts, val))

  def _GetSubjectResults(self, result, limit):
    subject_results = []
    for attribute, cells in result.to_dict().iteritems():
      for cell in cells:
        subject_results.append((attribute, self.Decode(attribute, cell.value),
                                self.DatetimeToMicroseconds(cell.timestamp)))
        limit -= 1
        if limit <= 0:
          return subject_results, limit
    return subject_results, limit

  def MultiResolvePrefix(self,
                         subjects,
                         attribute_prefix,
                         timestamp=None,
                         limit=None,
                         token=None):
    """Get results from multiple rows matching multiple attributes.

    We could implement this using read_rows, but it is a table scan. Our current
    data model makes that slow because it is a directory hierarchy that includes
    entries for subdirectories interleaved. So if you want all the results for a
    directory you need to skip those in the scan.

    Instead we make an RPC for each subject all at once using a threadpool. We
    pay more in RPC overhead but we get to do it concurrently.

    Args:
      subjects: A list of subjects.
      attribute_prefix: The attribute prefix.

      timestamp: A range of times for consideration (In
          microseconds). Can be a constant such as ALL_TIMESTAMPS or
          NEWEST_TIMESTAMP or a tuple of ints (start, end).

      limit: The total number of result values to return.
      token: An ACL token.

    Yields:
       A list of tuples:
       (subject, [(attribute, value string, timestamp)])

       that can be simply converted to a dict.

       Values with the same attribute (happens when timestamp is not
       NEWEST_TIMESTAMP, but ALL_TIMESTAMPS or time range) are guaranteed
       to be ordered in the decreasing timestamp order.

    Raises:
      AccessError: if anything goes wrong.
      ValueError: if we get a string instead of a list of subjects.
    """
    self.security_manager.CheckDataStoreAccess(
        token, subjects, self.GetRequiredResolveAccess(attribute_prefix))

    if isinstance(subjects, basestring):
      raise ValueError("Expected list of subjects, got string: %s" % subjects)

    if isinstance(attribute_prefix, basestring):
      attribute_prefix_list = [utils.SmartStr(attribute_prefix)]
    else:
      attribute_prefix_list = [utils.SmartStr(x) for x in attribute_prefix]

    timestamp_filter = self._TimestampToFilter(timestamp)
    filter_union = []

    for attribute_prefix in attribute_prefix_list:
      family, column = self.GetFamilyColumn(attribute_prefix)

      family_filter = row_filters.FamilyNameRegexFilter(family)
      row_filter_list = [family_filter]

      if column:
        # Make it an actual regex
        column += ".*"
        col_filter = row_filters.ColumnQualifierRegexFilter(column)
        row_filter_list.append(col_filter)

      if timestamp_filter:
        row_filter_list.append(timestamp_filter)

      if len(row_filter_list) > 1:
        row_filter = row_filters.RowFilterChain(filters=row_filter_list)
      else:
        row_filter = row_filter_list[0]

      filter_union.append(row_filter)

    # More than one set of prefixes, use a union, otherwise just use the
    # existing filter chain.
    if len(filter_union) > 1:
      attribute_filter = row_filters.RowFilterUnion(filters=filter_union)
    else:
      attribute_filter = filter_union[0]

    # Apply those filters to each subject as a separate RPC using a threadpool
    pool_args = []
    original_subject_map = {}
    for subject in subjects:
      # List of *args, **kwargs to pass to the RPC caller
      pool_args.append(((self.table.read_row, "read", utils.SmartStr(subject)),
                        {
                            "filter_": attribute_filter
                        }))

      # We're expected to return subjects as their original type, which can be
      # URN, unicode, or string. Keep a mapping in this dict.
      original_subject_map[utils.SmartStr(subject)] = subject

    max_results = limit or 2**64
    for result in self.pool.imap_unordered(self._WrapCallWithRetry, pool_args):
      if max_results <= 0:
        break
      if result:
        subject_results, max_results = self._GetSubjectResults(result,
                                                               max_results)
        yield original_subject_map[
            result.row_key], self._SortResultsByAttrTimestampValue(
                subject_results)

  @utils.Synchronized
  def Flush(self):
    """Wait for threadpool jobs to finish, then make a new pool."""
    self.pool.close()
    self.pool.join()
    self.pool = ThreadPool(config_lib.CONFIG["CloudBigtable.threadpool_size"])

  def Resolve(self, subject, attribute, token=None):
    """Retrieve the latest value set for a subject's attribute.

    Args:
      subject: The subject URN.
      attribute: The attribute.
      token: The security token used in this call.

    Returns:
      A (string, timestamp in microseconds) stored in the bigtable
      cell, or (None, 0).

    Raises:
      AccessError: if anything goes wrong.
    """
    subject = utils.SmartStr(subject)
    self.security_manager.CheckDataStoreAccess(
        token, [subject], self.GetRequiredResolveAccess(attribute))

    attribute = utils.SmartStr(attribute)
    family, column = self.GetFamilyColumn(attribute)

    col_filter = row_filters.ColumnRangeFilter(
        family, start_column=column, end_column=column)

    # Most recent
    latest_filter = row_filters.CellsColumnLimitFilter(1)

    row_filter = row_filters.RowFilterChain(filters=[col_filter, latest_filter])
    row_data = self.table.read_row(subject, filter_=row_filter)

    if row_data:
      for cell in row_data.cells[family][column]:
        return self.Decode(
            attribute, cell.value), self.DatetimeToMicroseconds(cell.timestamp)

    return None, 0

  def ResolveMulti(self,
                   subject,
                   attributes,
                   timestamp=None,
                   limit=None,
                   token=None):
    """Resolve multiple attributes for a subject.

    Results will be returned in arbitrary order (i.e. not ordered by attribute
    or timestamp).

    Args:
      subject: The subject to resolve.
      attributes: The attribute string or list of strings to match. Note this is
          an exact match, not a regex.
      timestamp: A range of times for consideration (In
          microseconds). Can be a constant such as ALL_TIMESTAMPS or
          NEWEST_TIMESTAMP or a tuple of ints (start, end).
      limit: The maximum total number of results we return.
      token: The security token used in this call.

    Yields:
       A unordered list of (attribute, value string, timestamp).

    Raises:
      AccessError: if anything goes wrong.
    """
    subject = utils.SmartStr(subject)
    self.security_manager.CheckDataStoreAccess(
        token, [subject], self.GetRequiredResolveAccess(attributes))

    if isinstance(attributes, basestring):
      attributes = [utils.SmartStr(attributes)]
    else:
      attributes = [utils.SmartStr(x) for x in attributes]

    filter_union = []
    for attribute in attributes:
      family, column = self.GetFamilyColumn(attribute)
      col_filter = row_filters.ColumnRangeFilter(
          family, start_column=column, end_column=column)
      filter_union.append(col_filter)

    # More than one attribute, use a union, otherwise just use the
    # existing filter.
    if len(filter_union) > 1:
      filter_union = row_filters.RowFilterUnion(filters=filter_union)
    else:
      filter_union = filter_union[0]

    # Essentially timestamp AND (attr1 OR attr2)
    timestamp_filter = self._TimestampToFilter(timestamp)
    if timestamp_filter:
      row_filter = row_filters.RowFilterChain(
          filters=[filter_union, timestamp_filter])
    else:
      row_filter = filter_union

    row_data = self.CallWithRetry(
        self.table.read_row, "read", subject, filter_=row_filter)

    if row_data:
      max_results = limit or 2**64
      for column, cells in row_data.cells[family].iteritems():
        attribute = ":".join((family, column))
        for cell in cells:
          if max_results <= 0:
            raise StopIteration
          max_results -= 1
          yield attribute, self.Decode(
              attribute,
              cell.value), self.DatetimeToMicroseconds(cell.timestamp)

  def _GetAttributeFilterUnion(self, attributes, timestamp_filter=None):
    filters = []
    for attribute_prefix in attributes:
      family, column = self.GetFamilyColumn(attribute_prefix)

      family_filter = row_filters.FamilyNameRegexFilter(family)
      row_filter_list = [family_filter]

      if column:
        col_filter = row_filters.ColumnQualifierRegexFilter(column)
        row_filter_list.append(col_filter)

      if timestamp_filter:
        row_filter_list.append(timestamp_filter)

      if len(row_filter_list) > 1:
        row_filter = row_filters.RowFilterChain(filters=row_filter_list)
      else:
        row_filter = row_filter_list[0]

      filters.append(row_filter)

    # More than one attribute, use a union, otherwise just use the
    # existing filter.
    if len(filters) > 1:
      filters = row_filters.RowFilterUnion(filters=filters)
    else:
      filters = filters[0]

    return filters

  def _ReOrderRowResults(self, row_data):
    subject_results = {}
    for family, column_dict in row_data.cells.iteritems():
      for column, cells in column_dict.iteritems():
        attribute = ":".join((family, column))
        subject_results[attribute] = []
        for cell in cells:
          subject_results[attribute].append(
              (self.DatetimeToMicroseconds(cell.timestamp),
               self.Decode(attribute, cell.value)))

          subject_results[attribute] = sorted(
              subject_results[attribute], key=lambda x: -x[0])
        if len(subject_results[attribute]) == 1:
          subject_results[attribute] = subject_results[attribute][0]
    return subject_results

  def ScanAttributes(self,
                     subject_prefix,
                     attributes,
                     after_urn=None,
                     max_records=None,
                     token=None,
                     relaxed_order=False):
    subject_prefix = self._CleanSubjectPrefix(subject_prefix)
    after_urn = self._CleanAfterURN(after_urn, subject_prefix)
    # Turn subject prefix into an actual regex
    subject_prefix += ".*"
    self.security_manager.CheckDataStoreAccess(token, [subject_prefix], "rq")

    subject_filter = row_filters.RowKeyRegexFilter(
        utils.SmartStr(subject_prefix))
    latest_value = row_filters.CellsColumnLimitFilter(1)
    attribute_filters = self._GetAttributeFilterUnion(attributes)
    # Subject AND (attr1 OR attr2) AND latest_value
    query_filter = row_filters.RowFilterChain(
        [subject_filter, attribute_filters, latest_value])

    # The API results include the start row, we want to exclude it, append a
    # null to do so.
    if after_urn is not None:
      after_urn += "\x00"

    rows_data = self.CallWithRetry(
        self.table.read_rows,
        "read",
        start_key=after_urn,
        limit=max_records,
        filter_=query_filter)

    # Ideally we should be able to stream and yield, but it seems we can't:
    # https://github.com/GoogleCloudPlatform/google-cloud-python/issues/1812
    self.CallWithRetry(rows_data.consume_all, "read")

    results = []
    if rows_data.rows:
      for subject, row_data in rows_data.rows.iteritems():
        subject_results = self._ReOrderRowResults(row_data)
        results.append((subject, subject_results))
    return sorted(results, key=lambda x: x[0])
from multiprocessing.pool import ThreadPool

from pyprimes import isprime_division as isprime

LIMIT = 1000000
CONCURRENCY = cpu_count()

def check_prime(num):
    return isprime(num), num


class benchmark(object):
    from timeit import default_timer as timer
    def __init__(self, name):
        self.name = name
    def __enter__(self):
        self.start = self.timer()
    def __exit__(self, ty, val, tb):
        end = self.timer()
        print("%s : %0.3f seconds" % (self.name, end-self.start))
        return False

pool = ThreadPool(CONCURRENCY)
print("Starting...")

with benchmark("multithreaded primality test"):
    results = pool.map_async(check_prime, xrange(LIMIT))
    results.get()

print("{0} prime(s) detected.".format(sum(1 for res in results.get() if res[0])))
Exemple #57
0
def run_parallel(func, args, threads, callback):
    """Run processing arguments with function in multiple threads."""
    pool = Pool(processes=threads)
    pool.map_async(func, args, callback=callback)
    pool.close()
    pool.join()
Exemple #58
0
        ok = verify_file_cmd(args, file, cmd)
        status = 'OK  ' if ok else 'FAIL'
        report_write(args, '%s %s\n' % (status, file))
        if not ok:
            time.sleep(0.5) # to break it with Ctrl+C
elif args.mode in ('write', 'append'):
    if os.path.exists(args.out):
        backup_name = args.out + '.orig'
        os.system('cp %s %s' %\
                (escape_file(args.out), escape_file(backup_name)))
    if args.out == '-':
        args.o = sys.stdout
    elif args.mode == 'write':
        args.o = open(args.out, 'w')
    elif args.mode == 'append':
        args.o = open(args.out, 'a')
    base_dir = args.dir
    files = list_files(base_dir)
    if args.mode in ('write', 'append') \
            and args.reuse in ('yes', 'verify'):
        files.sort(key=lambda file: 0 if file in file2cmd else 1)
    if append:
        o_write(args, "# PlowBackup begin\n")
    pool = ThreadPool(args.workers)
    async_result = pool.map_async(do_upload, files)
    while not async_result.ready():
        time.sleep(1) # to break it with Ctrl+C
    if append:
        o_write(args, "# PlowBackup end\n")

Exemple #59
0
from multiprocessing.pool import ThreadPool
import random
from itertools import count

import time

def process(index):
    print "trying"
    time.sleep(random.randint(1,10))
    val = 'Done {}'.format(index)
    return val

def callback(x):
    print x


pool = ThreadPool(10)

pool.map_async(process, xrange(10), 3, callback=callback)


print 'Done Done'


Exemple #60
0
class Impala(Service):
  """This class represents an Impala service running on a cluster. The class is intended
     to help with basic tasks such as connecting to an impalad or checking if queries
     are running.
  """

  def __init__(self, cluster, impalads):
    Service.__init__(self, cluster)
    self.impalads = impalads
    for i in impalads:
      i.impala = self

    self._thread_pool = ThreadPool()

  @property
  def warehouse_dir(self):
    return self.cluster.hive.warehouse_dir

  def connect(self, db_name=None, impalad=None):
    if not impalad:
      impalad = choice(self.impalads)
    conn = ImpalaConnection(
        host_name=impalad.host_name,
        port=impalad.hs2_port,
        user_name=self.cluster.hadoop_user_name,
        db_name=db_name,
        use_kerberos=self.cluster.use_kerberos,
        use_ssl=self.cluster.use_ssl,
        ca_cert=self.cluster.ca_cert,
    )
    conn.cluster = self.cluster
    return conn

  @contextmanager
  def cursor(self, db_name=None, impalad=None):
    with self.connect(db_name=db_name, impalad=impalad) as conn:
      with conn.cursor() as cur:
        yield cur

  def find_stopped_impalads(self):
    stopped = list()
    for idx, pid in enumerate(self.for_each_impalad(lambda i: i.find_pid())):
      if not pid:
        stopped.append(self.impalads[idx])
    return stopped

  def find_and_set_path_to_running_impalad_binary(self):
    self.for_each_impalad(lambda i: i.find_and_set_path_to_running_binary())

  def cancel_queries(self):
    self.for_each_impalad(lambda i: i.cancel_queries())

  def get_version_info(self):
    return self.for_each_impalad(lambda i: i.get_version_info(), as_dict=True)

  def queries_are_running(self):
    return any(self.for_each_impalad(lambda i: i.queries_are_running()))

  def find_impalad_mem_mb_limit(self):
    return self.for_each_impalad(lambda i: i.find_process_mem_mb_limit())

  def find_impalad_mem_mb_reported_usage(self):
    return self.for_each_impalad(
        lambda i: i.find_reported_mem_mb_usage())

  def find_impalad_mem_mb_actual_usage(self):
    return self.for_each_impalad(lambda i: i.find_actual_mem_mb_usage())

  def find_crashed_impalads(self, start_time):
    """If any impalads are found not running, they will assumed to have crashed. A crash
       info message will be return for each stopped impalad. The return value is a dict
       keyed by impalad. See Impalad.find_last_crash_message() for info about the returned
       messages. 'start_time' is used to filter log messages and core dumps, it should
       be set to the time when the Impala service was started. Impalads that have
       non-generic crash info will be sorted last in the returned dict.
    """
    stopped_impalads = self.find_stopped_impalads()
    if not stopped_impalads:
      return dict.fromkeys(stopped_impalads)
    messages = OrderedDict()
    impalads_with_message = dict()
    for i, message in izip(stopped_impalads, self.for_each_impalad(
        lambda i: i.find_last_crash_message(start_time), impalads=stopped_impalads)):
      if message:
        impalads_with_message[i] = "%s crashed:\n%s" % (i.host_name, message)
      else:
        messages[i] = "%s crashed but no info could be found" % i.host_name
    messages.update(impalads_with_message)
    return messages

  def for_each_impalad(self, func, impalads=None, as_dict=False):
    if impalads is None:
      impalads = self.impalads
    promise = self._thread_pool.map_async(func, impalads)
    # Python doesn't handle ctrl-c well unless a timeout is provided.
    results = promise.get(maxint)
    if as_dict:
      results = dict(izip(impalads, results))
    return results

  def restart(self):
    raise NotImplementedError()