Example #1
0
def get_taotu_pages(category_url):
    # 找到某个分类下全部的分页URL
    print('process category: {0}'.format(category_url))
    soup = commons.soup(category_url, encoding='utf8')
    print('process index: {0}'.format(soup.title))
    last_no = get_last_page_no(soup)
    urls = ['{0}/list_{1}.html'.format(category_url, i) for i in range(2, last_no + 1)]
    # for url in urls:
    # download_by_page(url)
    retry = 0
    while True:
        pool = ThreadPool(4)
        try:
            pool.map(download_by_page, urls)
            pool.close()
            pool.join()
            print('all images downloaded completely.')
            break
        except KeyboardInterrupt, e:
            print('download terminated by user, quit now.', e)
            pool.terminate()
            pool.join()
            break
        except Exception, e:
            pool.terminate()
            pool.join()
            retry += 1
            traceback.print_exc()
            try:
                print('download error: {0}, {1} retry in {2}s'.format(
                    e, retry, retry * 20 % 120))
            except Exception:
                pass
            time.sleep(retry * 20 % 120)
def dispatch(objects: list, method: str, parentId: int):
    # print("{} sleepin 5".format(parentId))
    # time.sleep(5)
    # print("{} done sleepin 5".format(parentId))
    # return parentId

    if method.lower() == "serial":

        for object in objects:
            try:
                runner(object)
            except clsDelayedException as tb:
                tb.re_raise()

    elif method.lower() == "parallel":
        # pool = ThreadPool(processes=len(objects))
        pool = ThreadPool(processes=5)
        try:
            # imap_unordered returns the result as soon as they're available
            list(pool.imap_unordered(runner, objects))
        except clsDelayedException as tb:
            logging.error(
                f"Terminating thread pool of parent with id {parentId}")
            pool.close()
            pool.terminate()
            tb.re_raise()
        else:
            pool.close()
            pool.join()
            # pool.wait(timeout=10)

    else:
        raise AttributeError(
            "Only 'serial' or 'parallel' supported for dispatch method")
Example #3
0
    def _get_available_endpoint(self):
        """
        Private method that handles filtering endpoints of the right size, parallely asking those endpoints if they're
        available and handling what they find out.
        :return: an endpoint that a scan can be ran on or raise a NoServersAvailableError
        """
        Logger.app.debug("Searching for appropriately sized servers")

        correct_sized_endpoints = self._get_endpoints_of_the_right_size()
        # correct_sized_endpoints = random.shuffle(correct_sized_endpoints)
        Logger.app.debug("correct_sized_endpoints: {}".format(correct_sized_endpoints))

        # if there are no endpoints of that size.
        if len(correct_sized_endpoints) == 0:
            Logger.app.error("No servers of that size are available.")
            raise NoServersAvailableError

        pool = ThreadPool(len(correct_sized_endpoints))

        # start some threads - first to finish adds to a queue and that is what we return.
        pool.imap_unordered(self._is_endpoint_available, correct_sized_endpoints)

        try:
            correct_sized_endpoint = self._results_queue.get(block=True, timeout=self.timeout)
        except queue.Empty as e:  # if queue is still empty after timeout period this is raised.
            Logger.app.error("The search has timed out after {} seconds.".format(self.timeout))
            raise NoServersAvailableError

        Logger.app.debug("We have an endpoint: {}".format(correct_sized_endpoint))

        # kill all running threads
        pool.terminate()

        return correct_sized_endpoint
Example #4
0
def extract_features(extract_fn, image_path, ofile, params={}):

    cwd = os.getcwd()
    image_path = utils.absolute_path(image_path)

    # Read filenames
    files = []
    if os.path.isdir(image_path):
        for dirpath, _, filenames in os.walk(image_path):
            for f in filenames:
                path = os.path.abspath(os.path.join(dirpath, f))
                if not utils.is_valid_image(path):
                    print("Warning, please provide a valid image: ", f)
                else:
                    files.append(path)
    else:
        files = [image_path]

    files.sort(key=utils.natural_sort_key)

    output_file = utils.absolute_path(ofile)

    if os.path.isdir(output_file):
        print("The provided file is a directory:", output_file)
        sys.exit(0)

    if os.path.exists(output_file):
        os.remove(output_file)

    def extract_and_save(path):
        try:
            X = extract_fn(path, **params)
        except Exception as e:
            print("Cannot extract feactures from", path)
            print(str(e))
            return

        X = X.reshape((1, X.shape[0]))
        lock.acquire()
        with open(output_file, 'a+') as f_handle:
            with open(output_file + ".label", 'a+') as f_handle_label:
                numpy.savetxt(f_handle, X)
                f_handle_label.write(os.path.basename(path) + "\n")
        lock.release()

    pool = ThreadPool(cpu_count())
    results = pool.map(extract_and_save, files)
    pool.close()
    pool.terminate()
    pool.join()
    """
    for path in files:
        X = feaext.SRM_extract(path, **params)
        print X.shape
        X = X.reshape((1, X.shape[0]))
        with open(sys.argv[3], 'a+') as f_handle:
            numpy.savetxt(f_handle, X)
    """

    os.chdir(cwd)
Example #5
0
def fetch_or_load_urls(filename):
    # 获取全部的图卦URL列表
    # 如果存在缓存,从缓存读取
    # 否则从网上多线程/进程获取
    if os.path.exists(jsonfile):
        print('found url json file cache {0}'.format(filename))
        return json.load(open(jsonfile, 'r'))
    pool = ThreadPool(4)
    try:
        pool.map(find_all_tugua_urls, range(1, 51))
        pool.close()
        pool.join()
    except KeyboardInterrupt:
        print('terminated by user.')
        pool.terminate()
        pool.join()

    items = [url_to_item(url) for url in sorted(urls, cmp=url_cmp)]

    if items:
        # json.dump(items, open('urls.json', 'w'),indent=2) # 输出\uxxxx
        json.dump(
            items,
            codecs.open(filename, 'w', 'utf8'),  # 输出中文文字
            ensure_ascii=False,
            indent=2)

        print('saved url json file cache to {0}'.format(filename))
    else:
        print('no url items found, maybe something wrong!')

    return items
Example #6
0
    def ComputeTimelineBasedMetrics(self):
        assert not self._current_page_run, 'Cannot compute metrics while running.'

        def _GetCpuCount():
            try:
                return multiprocessing.cpu_count()
            except NotImplementedError:
                # Some platforms can raise a NotImplementedError from cpu_count()
                logging.warn('cpu_count() not implemented.')
                return 8

        runs_and_values = self._FindRunsAndValuesWithTimelineBasedMetrics()
        if not runs_and_values:
            return

        # Note that this is speculatively halved as an attempt to fix
        # crbug.com/953365.
        threads_count = min(_GetCpuCount() / 2 or 1, len(runs_and_values))
        pool = ThreadPool(threads_count)
        try:
            for result in pool.imap_unordered(_ComputeMetricsInPool,
                                              runs_and_values):
                self._AddPageResults(result)
        finally:
            pool.terminate()
            pool.join()
Example #7
0
def SerializeHtmlTraces(results):
    """Creates html trace files for each story run, if necessary.

  For each story run, takes all trace files from individual trace agents
  and runs trace2html on them. This is done only once, subsequent calls to this
  function will not do anything.

  TODO(crbug.com/981349): Remove this function entirely when trace
  serialization has been handed over to results processor.
  """
    assert not results.current_story_run, 'Cannot serialize traces while running.'

    def _GetCpuCount():
        try:
            return multiprocessing.cpu_count()
        except NotImplementedError:
            # Some platforms can raise a NotImplementedError from cpu_count()
            logging.warn('cpu_count() not implemented.')
            return 8

    available_runs = list(run for run in results.IterRunsWithTraces())
    if not available_runs:
        return

    # Note that this is speculatively halved as an attempt to fix
    # crbug.com/953365.
    threads_count = min(_GetCpuCount() / 2 or 1, len(available_runs))
    pool = ThreadPool(threads_count)
    try:
        for _ in pool.imap_unordered(_SerializeHtmlTraceInPool,
                                     available_runs):
            pass
    finally:
        pool.terminate()
        pool.join()
Example #8
0
    def run():
        t = [
            ('users', User().create),
            ('forums', Forum().create),
            ('threads', Thread().create),
            ('posts', Post().create),
            ("followers", User().follow),
            ("subscribptions", Thread().subscribe),
        ]

        for entity, factory in t:
            entities = [True for i in range(int(settings[entity]))]
            num_tasks = len(entities)
            pool = ThreadPool(int(settings['num_threads']))
            try:
                progress = range(5, 105, 5)
                for i, _ in enumerate(pool.imap(factory, entities)):
                    perc = i * 100 / num_tasks
                    if perc % 5 == 0 and perc in progress: 
                        log.print_out('Creating %s: %d%% done' % (entity, perc))
                        progress.remove(perc)
                pool.close()
                pool.join()
            except Exception, e:
                print e
                pool.terminate()
                sys.exit(1)
def processJobs(jobs, concurrentTasks, sortOutput=False):
    job_count = len(jobs)
    logging.info("Processing {} job(s) with a concurrency of {}".format(
        job_count, concurrentTasks))

    if RANDOMIZE_JOBS: shuffle(jobs)

    pool = Pool(concurrentTasks)
    try:
        job_progress = 0
        for x in tqdm(pool.imap_unordered(worker, jobs), total=len(jobs)):
            job_progress += 1
            logging.info("{} out of {} staged jobs remaining".format(
                job_count - job_progress, job_count))
        pool.close()
        pool.join()
    except KeyboardInterrupt:
        printAndLog(
            "\nReceived keyboard interrupt. Cleaning up and exiting...")
        pool.terminate()
        cleanup()
        sys.exit(1)
    except SystemExit:
        pool.terminate()
        sys.exit(1)
    if sortOutput: cleanup()
    print("\n")
def tests():
    # for vm in VM_REFERENCE:
    #     vmJson = str(VM_REFERENCE[vm]).replace("\'", "\"")
    #     #VM_REFERENCE[vm]['vmName']
    #     #vmDetails = json.JSONDecoder.decode(VM_REFERENCE[vm])
    #     print(vmJson)
    #     vmDetails = json.loads(vmJson)
    #     type(vmDetails)

    #     print(f'\nTEST {vmDetails["vmName"]}')
    #     #vmDetails.get().

    # print(f'\nTEST {VM_REFERENCE["dc"]["nicPrivateIPAddress"]}')
    # pips_info = []
    # pips_info.append("item1")
    # pips_info.append("item2")
    # pips_info.append("item3")
    # print(f'\nTEST {pips_info[0]}')

    # # replace_in_dict(example_dict, variables)
    # ADMIN_PASSWORD = os.environ['ADMINPASSWORD']
    # SERVICE_ACCOUNT_PASSWORD = ADMIN_PASSWORD
    # PASSWORDS["ADMIN_PASSWORD"] = ADMIN_PASSWORD
    # PASSWORDS["SERVICE_ACCOUNT_PASSWORD"] = SERVICE_ACCOUNT_PASSWORD
    # #result = replace_in_dict(example_dict, PASSWORDS)
    # result = replace_in_dict(VM_DSC_REFERENCE, PASSWORDS)
    # print(f'\nresult after: {result}')
    
    print(f'\nStart test')
    print_time_pool = ThreadPool(1)
    print_time_pool.apply_async(print_time)
    time.sleep(2)
    print_time_pool.terminate()
Example #11
0
def dowload_all(by_page=False):
    # 下载全站标签对应的图片
    items = range(1, 145) if by_page else get_all_tags()
    retry = 0
    while True:
        pool = ThreadPool(4)
        try:
            pool.map(download_by_page if by_page else download_by_tag, items)
            pool.close()
            pool.join()
            print('all images are downloaded completely.')
            break
        except KeyboardInterrupt, e:
            print('download terminated by user, quit now.', e)
            pool.terminate()
            pool.join()
            break
        except Exception, e:
            pool.terminate()
            pool.join()
            retry += 1
            traceback.print_exc()
            try:
                print('download error: {0}, {1} retry in {2}s'.format(
                    e, retry, retry * 20 % 120))
            except Exception:
                pass
            time.sleep(retry * 20 % 120)
Example #12
0
    def run():
        t = [
            ('users', User().create),
            ('forums', Forum().create),
            ('threads', Thread().create),
            ('posts', Post().create),
            ("followers", User().follow),
            ("subscribptions", Thread().subscribe),
        ]

        for entity, factory in t:
            entities = [True for i in range(int(settings[entity]))]
            num_tasks = len(entities)
            pool = ThreadPool(int(settings['num_threads']))
            try:
                progress = range(5, 105, 5)
                for i, _ in enumerate(pool.imap(factory, entities)):
                    perc = i * 100 / num_tasks
                    if perc % 5 == 0 and perc in progress:
                        log.print_out('Creating %s: %d%% done' %
                                      (entity, perc))
                        progress.remove(perc)
                pool.close()
                pool.join()
            except Exception, e:
                print e
                pool.terminate()
                sys.exit(1)
Example #13
0
def put_from_manifest(
        s3_bucket, s3_connection_host, s3_ssenc, s3_base_path,
        aws_access_key_id, aws_secret_access_key, manifest,
        bufsize, reduced_redundancy, rate_limit, concurrency=None, incremental_backups=False):
    """
    Uploads files listed in a manifest to amazon S3
    to support larger than 5GB files multipart upload is used (chunks of 60MB)
    files are uploaded compressed with lzop, the .lzo suffix is appended
    """
    exit_code = 0
    bucket = get_bucket(
        s3_bucket, aws_access_key_id,
        aws_secret_access_key, s3_connection_host)
    manifest_fp = open(manifest, 'r')
    buffer_size = int(bufsize * MBFACTOR)
    files = manifest_fp.read().splitlines()
    pool = Pool(concurrency)
    for f in pool.imap(upload_file,
                       ((bucket, f, destination_path(s3_base_path, f), s3_ssenc, buffer_size, reduced_redundancy, rate_limit) for f in files if f)):
        if f is None:
            # Upload failed.
            exit_code = 1
        elif incremental_backups:
            # Delete files that were successfully uploaded.
            os.remove(f)
    pool.terminate()
    exit(exit_code)
Example #14
0
def put_from_manifest(
    s3_bucket,
    s3_connection_host,
    s3_ssenc,
    s3_base_path,
    aws_access_key_id,
    aws_secret_access_key,
    manifest,
    bufsize,
    concurrency=None,
    incremental_backups=False,
):
    """
    Uploads files listed in a manifest to amazon S3
    to support larger than 5GB files multipart upload is used (chunks of 60MB)
    files are uploaded compressed with lzop, the .lzo suffix is appended
    """
    bucket = get_bucket(s3_bucket, aws_access_key_id, aws_secret_access_key, s3_connection_host)
    manifest_fp = open(manifest, "r")
    buffer_size = int(bufsize * MBFACTOR)
    files = manifest_fp.read().splitlines()
    pool = Pool(concurrency)
    for _ in pool.imap(
        upload_file, ((bucket, f, destination_path(s3_base_path, f), s3_ssenc, buffer_size) for f in files)
    ):
        pass
    pool.terminate()

    if incremental_backups:
        for f in files:
            os.remove(f)
Example #15
0
def main1():
    pp = sys.argv[1]
    log = get_logger(level=logging.DEBUG)
    pool = ThreadPool(10)
    q = queue.Queue()
    i = 0
    total = 0
    try:
        t = time.time()
        print(time.localtime(t))
        for root, dirs, files in os.walk(pp, True):
            for f in files:
                fpath = pjoin(root, f)
                # q.put(pool.apply_async(work, (fpath, t, log)))
                pool.apply_async(work, (fpath, t, log))
                total += 1
        log.info("total=%d, q=%d, t=%d", total, q.qsize(), time.time() - t)
        pool.close()
        pool.join()
        log.info("finish, total=%d, q=%d, t=%d", total, q.qsize(),
                 time.time() - t)
        print(time.localtime())
    except Exception as e:
        log.exception(e)
        pool.terminate()
        raise
    finally:
        pool.join()
Example #16
0
def put_from_manifest(s3_bucket,
                      s3_connection_host,
                      s3_ssenc,
                      s3_base_path,
                      aws_access_key_id,
                      aws_secret_access_key,
                      manifest,
                      bufsize,
                      concurrency=None,
                      incremental_backups=False):
    """
    Uploads files listed in a manifest to amazon S3
    to support larger than 5GB files multipart upload is used (chunks of 60MB)
    files are uploaded compressed with lzop, the .lzo suffix is appended
    """
    bucket = get_bucket(s3_bucket, aws_access_key_id, aws_secret_access_key,
                        s3_connection_host)
    manifest_fp = open(manifest, 'r')
    buffer_size = int(bufsize * MBFACTOR)
    files = manifest_fp.read().splitlines()
    pool = Pool(concurrency)
    for _ in pool.imap(
            upload_file,
        ((bucket, f, destination_path(s3_base_path, f), s3_ssenc, buffer_size)
         for f in files)):
        pass
    pool.terminate()

    if incremental_backups:
        for f in files:
            os.remove(f)
Example #17
0
def abortable_worker(func, *args, **kwargs):
    timeout = kwargs.get("timeout", None)
    instance_id = kwargs.get("instance_id", None)
    instance_name = kwargs.get("instance_name", None)
    main_result = kwargs.get("result", None)
    timeout_results = []
    timeout_results.append(main_result)
    p = ThreadPool(1)
    timeout_result = initResult()
    res = p.apply_async(func, args=args)
    try:
        out = res.get(timeout)  #wait for function to complete
        return out
    except mp.TimeoutError as e:
        print "Timeout"
        timeout_result["id"] = instance_id
        timeout_result["name"] = instance_name
        timeout_result["action"] = "GET_STATUS"
        timeout_result["status"] = "Timeout checking status after X secs"
        timeout_results.append(timeout_result)
        #process_results(timeout_results)
        print "terminating due to timeout"
        print timeout_results
        p.terminate()
        #print "Printing timeout result ********************"
        #print timeout_results
        return timeout_results
Example #18
0
    def process(self, urls):
        result = {}
        urls = list(set(urls))
        # get crawled pages
        for page in self.storage.find({'_id': {'$in': urls}}):
            if page.get('crawled_date'):
                self.logger.debug('Page was crawled: ' + page['_id'])
                result[page['_id']] = page

        self.logger.info("Num of crawled urls: %s" % len(result))
        # filter crawled page
        urls = [u for u in urls if u not in result]

        self.logger.info("Remain haven't crawled urls: %s" % len(urls))

        if not urls:
            self.logger.info('All urls has been crawled')
            return result

        if len(urls) > 2:
            # use multi thread to crawl pages
            pool = Pool(cpu_count() * 2)
            self.logger.debug('Have to crawl these urls: %s' % urls)
            pool_results = pool.map(self._crawl_page, urls)
            # get results
            for r in pool_results:
                result.update(r)

            pool.close()
            pool.terminate()
        else:
            for url in urls:
                result.update(self._crawl_page(url))

        return result
def bfs(start, depth, keyword):
    start = removeQuery(start)
    foundUrls.append(removeScheme(start))
    startNode = newNode(start)
    nodeList.append(startNode)
    queue = [startNode]
    keywordFound = False
    p = ThreadPool(10)

    #if we have reached depth then we don't need to search any more links
    #check all of the nodes at this depth before moving deeper
    while depth > 0 and queue and not keywordFound:
        results = p.map(scrape, queue)
        queue = []
        for children in list(results):
            for childNode in children:
                tempUrl = childNode['url']
                if removeScheme(tempUrl) in foundUrls:
                    children.remove(childNode)
                    continue
                if keyword and (keyword in tempUrl):
                    keywordFound = True
                    childNode['hasKeyword'] = True
                    break
                foundUrls.append(removeScheme(tempUrl))
            queue.extend(children)
            if (keywordFound): break
        depth -= 1
        print('***************** depth levels remaining = ', depth)
    p.terminate()
    p.join()
    return
Example #20
0
def abortable_func(func, *args, **kwargs):
	"""
	The abortable_func is the wrapper function, which wraps around function type "func", call 
	  it in a background thread (multiprocessing.dummy.Thread), and terminates it after
	  "timeout" seconds.
	This function is inspired by 
	  http://stackoverflow.com/questions/29494001/how-can-i-abort-a-task-in-a-multiprocessing-pool-after-a-timeout
	  but is an improvement over the original solution, since the original solution is only 
	  applicable to a function that takes positional arguments.

	Parameters of the function:
	  func - the function that will be called and terminated if not return with "timeout" seconds
	  *args - positional arguments of "func"
	  **kwargs - named arguments of "func" + "timeout" value
	"""
	
	#- Get "timeout" value and create a ThreadPool (multiprocessing.dummy.Pool) 
	#  with only 1 worker. 
	#- Use functools.partial (https://docs.python.org/3/library/functools.html)
	#  to fit all the arguments of the func into the interface of
	#  Pool.apply_async function
	timeout = kwargs.pop('timeout', None);
	p = ThreadPool(1);
	partial_func = partial(func,**kwargs);
	res = p.apply_async(partial_func,args);

	#- Terminate the thread if it does not return after "timeout" seconds
	#  otherwise return the returned value of func
	try:
		out = res.get(timeout);
		return out
	except TimeoutError:
		p.terminate()
		return "{}:Timeout exceeded. Process terminated.\r\n".format(args[0]);
Example #21
0
def main():
    n = 1000000
    m = 1
    m2 = 10000
    m3 = 100
    
    create_db()

    pool = Pool(processes=5)
    start = time.time()
    fill(n)
    fill_time = time.time() - start
    print('{} inserts in {}s'.format(n,fill_time))

    start = time.time()
    results = []
    for _ in range(m):
        results.append(pool.apply_async(read, ()))
#        results.append(pool.apply_async(read_dataset, ()))
        for i in range(m2):
            results.append(pool.apply_async(read_one, ()))
            if i%m3 == 0:
                results.append(pool.apply_async(fill, (1,)))
    for r in results:
        r.get(timeout=1000000)
    read_time = time.time() - start
    pool.terminate()

    print('{}.{} reads in {}s'.format(m,m2,read_time))
Example #22
0
def dowload_all(by_page=False):
    # 下载全站标签对应的图片
    items = range(1, 145) if by_page else get_all_tags()
    retry = 0
    while True:
        pool = ThreadPool(4)
        try:
            pool.map(download_by_page if by_page else download_by_tag, items)
            pool.close()
            pool.join()
            print('all images are downloaded completely.')
            break
        except KeyboardInterrupt, e:
            print('download terminated by user, quit now.', e)
            pool.terminate()
            pool.join()
            break
        except Exception, e:
            pool.terminate()
            pool.join()
            retry += 1
            traceback.print_exc()
            try:
                print('download error: {0}, {1} retry in {2}s'.format(
                    e, retry, retry * 20 % 120))
            except Exception:
                pass
            time.sleep(retry * 20 % 120)
Example #23
0
def ComputeTimelineBasedMetrics(results):
    """Compute TBMv2 metrics on all story runs in parallel."""
    assert not results.current_story_run, 'Cannot compute metrics while running.'

    def _GetCpuCount():
        try:
            return multiprocessing.cpu_count()
        except NotImplementedError:
            # Some platforms can raise a NotImplementedError from cpu_count()
            logging.warn('cpu_count() not implemented.')
            return 8

    available_runs = list(run for run in results.IterRunsWithTraces()
                          if run.tbm_metrics)
    if not available_runs:
        return

    # Note that this is speculatively halved as an attempt to fix
    # crbug.com/953365.
    threads_count = min(_GetCpuCount() / 2 or 1, len(available_runs))
    pool = ThreadPool(threads_count)
    metrics_runner = lambda run: _ComputeMetricsInPool(run, results.label,
                                                       results.upload_bucket)

    try:
        for result in pool.imap_unordered(metrics_runner, available_runs):
            results.AddMetricPageResults(result)
    finally:
        pool.terminate()
        pool.join()
Example #24
0
def put_from_manifest(
        s3_bucket, s3_connection_host, s3_ssenc, s3_base_path,
        aws_access_key_id, aws_secret_access_key, manifest,
        bufsize, reduced_redundancy, rate_limit, concurrency=None, incremental_backups=False):
    """
    Uploads files listed in a manifest to amazon S3
    to support larger than 5GB files multipart upload is used (chunks of 60MB)
    files are uploaded compressed with lzop, the .lzo suffix is appended
    """
    exit_code = 0
    bucket = get_bucket(
        s3_bucket, aws_access_key_id,
        aws_secret_access_key, s3_connection_host)
    manifest_fp = open(manifest, 'r')
    buffer_size = int(bufsize * MBFACTOR)
    files = manifest_fp.read().splitlines()
    pool = Pool(concurrency)
    for f in pool.imap(upload_file,
                       ((bucket, f, destination_path(s3_base_path, f), s3_ssenc, buffer_size, reduced_redundancy, rate_limit) for f in files if f)):
        if f is None:
            # Upload failed.
            exit_code = 1
        elif incremental_backups:
            # Delete files that were successfully uploaded.
            os.remove(f)
    pool.terminate()
    exit(exit_code)
Example #25
0
def main():
    n = 1000000
    m = 1
    m2 = 10000
    m3 = 100

    create_db()

    pool = Pool(processes=5)
    start = time.time()
    fill(n)
    fill_time = time.time() - start
    print('{} inserts in {}s'.format(n, fill_time))

    start = time.time()
    results = []
    for _ in range(m):
        results.append(pool.apply_async(read, ()))
        #        results.append(pool.apply_async(read_dataset, ()))
        for i in range(m2):
            results.append(pool.apply_async(read_one, ()))
            if i % m3 == 0:
                results.append(pool.apply_async(fill, (1, )))
    for r in results:
        r.get(timeout=1000000)
    read_time = time.time() - start
    pool.terminate()

    print('{}.{} reads in {}s'.format(m, m2, read_time))
Example #26
0
def embed_message(embed_fn, path, payload, output_dir, embed_fn_saving=False):

    path = utils.absolute_path(path)

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    output_dir = utils.absolute_path(output_dir)

    # Read filenames
    files = []
    if os.path.isdir(path):
        for dirpath, _, filenames in os.walk(path):
            for f in filenames:
                path = os.path.abspath(os.path.join(dirpath, f))
                if not utils.is_valid_image(path):
                    print("Warning, please provide a valid image: ", f)
                else:
                    files.append(path)
    else:
        files = [path]

    # remove fileas already generated in a previous execution
    filtered_files = []
    for f in files:
        basename = os.path.basename(f)
        dst_path = os.path.join(output_dir, basename)
        if os.path.exists(dst_path):
            print("Warning! file already exists, ignored:", dst_path)
            continue
        filtered_files.append(f)
    files = filtered_files
    del filtered_files

    def embed(path):
        basename = os.path.basename(path)
        dst_path = os.path.join(output_dir, basename)

        if embed_fn_saving:
            embed_fn(path, payload, dst_path)
        else:
            X = embed_fn(path, payload)
            try:
                scipy.misc.toimage(X, cmin=0, cmax=255).save(dst_path)
            except Exception as e:
                print(str(e))

    # Process thread pool in batches
    batch = 1000
    for i in range(0, len(files), batch):
        files_batch = files[i:i + batch]
        n_core = cpu_count()
        print("Using", n_core, "threads")
        pool = ThreadPool(n_core)
        results = pool.map(embed, files_batch)
        pool.close()
        pool.terminate()
        pool.join()
    """
Example #27
0
def parallel_apply_generator(
    func, iterable, workers, max_queue_size, dummy=False, random_seeds=True
):
    """多进程或多线程地将func应用到iterable的每个元素中。
    注意这个apply是异步且无序的,也就是说依次输入a,b,c,但是
    输出可能是func(c), func(a), func(b)。结果将作为一个
    generator返回,其中每个item是输入的序号以及该输入对应的
    处理结果。
    参数:
        dummy: False是多进程/线性,True则是多线程/线性;
        random_seeds: 每个进程的随机种子。
    """
    if dummy:
        from multiprocessing.dummy import Pool, Queue
    else:
        from multiprocessing import Pool, Queue

    in_queue, out_queue, seed_queue = Queue(max_queue_size), Queue(), Queue()
    if random_seeds is True:
        random_seeds = [None] * workers
    elif random_seeds is None or random_seeds is False:
        random_seeds = []
    for seed in random_seeds:
        seed_queue.put(seed)

    def worker_step(in_queue, out_queue):
        """单步函数包装成循环执行
        """
        if not seed_queue.empty():
            np.random.seed(seed_queue.get())
        while True:
            i, d = in_queue.get()
            r = func(d)
            out_queue.put((i, r))

    # 启动多进程/线程
    pool = Pool(workers, worker_step, (in_queue, out_queue))

    # 存入数据,取出结果
    in_count, out_count = 0, 0
    for i, d in enumerate(iterable):
        in_count += 1
        while True:
            try:
                in_queue.put((i, d), block=False)
                break
            except six.moves.queue.Full:
                while out_queue.qsize() > max_queue_size:
                    yield out_queue.get()
                    out_count += 1
        if out_queue.qsize() > 0:
            yield out_queue.get()
            out_count += 1

    while out_count != in_count:
        yield out_queue.get()
        out_count += 1

    pool.terminate()
Example #28
0
def parallel_apply(func,
                   iterable,
                   workers,
                   max_queue_size,
                   callback=None,
                   dummy=False):
    """多进程或多线程地将func应用到iterable的每个元素中。
    注意这个apply是异步且无序的,也就是说依次输入a,b,c,但是
    输出可能是func(c), func(a), func(b)。
    参数:
        dummy: False是多进程/线性,True则是多线程/线性;
        callback: 处理单个输出的回调函数;
    """
    if dummy:
        from multiprocessing.dummy import Pool, Queue
    else:
        from multiprocessing import Pool, Queue

    in_queue, out_queue = Queue(max_queue_size), Queue()

    # 启动多进程/线程
    pool = Pool(workers, worker_step, (func, in_queue, out_queue))

    if callback is None:
        results = []

    # 后处理函数
    def process_out_queue():
        out_count = 0
        for _ in range(out_queue.qsize()):
            d = out_queue.get()
            out_count += 1
            if callback is None:
                results.append(d)
            else:
                callback(d)
        return out_count

    # 存入数据,取出结果
    in_count, out_count = 0, 0
    for d in iterable:
        in_count += 1
        while True:
            try:
                in_queue.put(d, block=False)
                break
            except six.moves.queue.Full:
                out_count += process_out_queue()
        if in_count % max_queue_size == 0:
            out_count += process_out_queue()

    while out_count != in_count:
        out_count += process_out_queue()

    pool.terminate()

    if callback is None:
        return results
Example #29
0
class MultiprocessingPool():
    """
	Wrapper for multiprocessing Pool map
	"""
    def __init__(self, func, arg_array, workers):
        """
		Args:
			func - method to run
			arg_array - list of input for function
			workers - integer number of threads for multiprocessing
		"""
        if workers < 1:
            raise ValueError(
                "MultiprocessingPool - Workers must be >= 1: Got value '{0}'".
                format(workers))
        if not isinstance(arg_array, list):
            raise ValueError(
                "MultiprocessingPool - Expected list for arg_array, got '{0}'".
                format(type(arg_array)))
        self.__pool = ThreadPool(workers)
        self.__func = func
        self.__arg_array_iter = iter(arg_array)

    def run(self):
        """Start the multiprocessing pool map

		Returns:
			returns list of return values of function
		"""
        try:
            res = self.__pool.map(self.__run_func, self.__arg_array_iter)
            self.__pool.close()
            self.__pool.join()
            return res
        except KeyboardInterrupt:
            log.debug("Keyboard interrupt while mapping pool.")
            self.__pool.terminate()
        except Exception as e:
            log.error("Unhandled Exception in pool: {0} - {1}".format(
                str(e), [traceback.format_exc()]))
            self.__pool.terminate()

    def __run_func(self, arg):
        """Wrapper for worker functions

		Args:
			arg - argument to pass into function
		Returns:
			return value of function
		"""
        try:
            return self.__func(arg)
        except KeyboardInterrupt:
            raise RuntimeError("Keyboard Interrupt")
        except Exception as e:
            log.error("Unhandled Exception in worker: {0} - {1}".format(
                str(e), [traceback.format_exc()]))
        return
Example #30
0
def timeout_wrapper(func, *args, **kwargs):
    timeout = kwargs.get("timeout", None)
    p = ThreadPool(1)
    res = p.apply_async(func, args=args)
    try:
        out = res.get(timeout)  # Wait timeout seconds for func to complete.
    except multiprocessing.TimeoutError:
        p.terminate()
    else:
        return out
def abortable_worker(in_args):
    tp = ThreadPool(1)
    timeout, tup = in_args
    res = tp.apply_async(func=spinSingleInstance, args=(tup, ))
    try:
        out = res.get(timeout)  # Wait timeout seconds for func to complete.
        return out
    except Exception as e:
        tp.terminate()
        raise
Example #32
0
def abortable_worker(func, *args, **kwargs):
    timeout = kwargs.get('timeout', None)
    p = ThreadPool(1)
    res = p.apply_async(func, args=args)
    try:
        out = res.get(timeout)  # Wait timeout seconds for func to complete.
        return out
    except multiprocessing.TimeoutError:
        p.terminate()
        raise
Example #33
0
 def __call__(self, *args, **kwargs):
     from multiprocessing.dummy import Pool as ThreadPool
     p = ThreadPool(1)
     res = p.apply_async(self.func, args=args, kwds=kwargs)
     try:
         out = res.get(self.timeout)  # Wait timeout seconds for func to complete.
         return out
     except multiprocessing.TimeoutError:
         p.terminate()
         raise multiprocessing.TimeoutError(str(self.timeout))
Example #34
0
class SThreadPool(metaclass=metas.Singleton):
    def __init__(self):
        self.__thread_pool__ = Pool(SConfig().WORKERS)

    def add(self, task, args):
        self.__thread_pool__.apply_async(task, args)

    def terminate(self):
        self.__thread_pool__.terminate()
        self.__thread_pool__.join()
Example #35
0
def _multipart_upload(buf, name, file_size, client, **kwargs):
    # scale the part size based on file size
    part_size = max(int(math.sqrt(MIN_PART_SIZE) * math.sqrt(file_size)),
                    MIN_PART_SIZE)
    num_parts = int(math.ceil((file_size) / float(part_size)))

    log.debug(
        'Uploading file with %s bytes using %s file parts with a part '
        'size of %s bytes', file_size, num_parts, part_size)
    file_response = client.files.post_multipart(name=name,
                                                num_parts=num_parts,
                                                **kwargs)

    # Platform will give us a URL for each file part
    urls = file_response.upload_urls
    if num_parts != len(urls):
        raise ValueError(
            f"There are {num_parts} file parts but only {len(urls)} urls")

    # upload function wrapped with a retry decorator
    @retry(RETRY_EXCEPTIONS)
    def _upload_part_base(item, file_path, part_size, file_size):
        part_num, part_url = item[0], item[1]
        offset = part_size * part_num
        num_bytes = min(part_size, file_size - offset)

        log.debug('Uploading file part %s', part_num)
        with open(file_path, 'rb') as fin:
            fin.seek(offset)
            partial_buf = BufferedPartialReader(fin, num_bytes)
            part_response = requests.put(part_url, data=partial_buf)

        if not part_response.ok:
            msg = _get_aws_error_message(part_response)
            raise HTTPError(msg, response=part_response)

        log.debug('Completed upload of file part %s', part_num)

    # upload each part
    try:
        pool = Pool(MAX_THREADS)
        _upload_part = partial(_upload_part_base,
                               file_path=buf.name,
                               part_size=part_size,
                               file_size=file_size)
        pool.map(_upload_part, enumerate(urls))

    # complete the multipart upload; an abort will be triggered
    # if any part except the last failed to upload at least 5MB
    finally:
        pool.terminate()
        client.files.post_multipart_complete(file_response.id)

    log.debug('Uploaded File %d', file_response.id)
    return file_response.id
Example #36
0
 def main(self, cnt):
     self.get_proxies()
     for i in range(1, cnt + 1):
         cars_links = self.get_listing(
             'https://www.otomoto.pl/osobowe/uzywane/', i)
         pool = ThreadPool(10)
         pool.map(self.parse_car, cars_links)
         pool.terminate()
         pool.close()
         pool.join()
     print('DONE!')
Example #37
0
 def run(self):
     in_queue, out_queue = Queue(), Queue()
     for i in self.a:
         in_queue.put(i)
     def f(in_queue, out_queue):
         while not in_queue.empty():
             time.sleep(1)
             out_queue.put(in_queue.get()+1)
     pool = Pool(4, f, (in_queue, out_queue))
     self.b = []
     while len(self.b) < len(self.a):
         if not out_queue.empty():
             self.b.append(out_queue.get())
     pool.terminate()
Example #38
0
def core():
    logging.basicConfig(level=logging.WARNING)
    # check()
    prepareprocess()

    pool=ThreadPool(20)
    time1 = datetime.now()
    timeend= time1+timedelta(minutes=2)

    while True:
        time2 = datetime.now()
        if(time2>timeend):
            break
        for i in classtorush:
            pool.apply_async(rush,args=(i,))
            time.sleep(0.25)
    time.sleep(10)
    pool.terminate()
    pool.join()
Example #39
0
def download_pages(items):
    retry = 0
    while retry < 10:
        pool = ThreadPool(4)
        try:
            pool.map(download_page, items)
            pool.close()
            pool.join()
            break
        except KeyboardInterrupt:
            print('download terminated by user, quit execution.')
            pool.terminate()
            break
        except Exception, e:
            pool.terminate()
            retry += 1
            print('download error occurred: {0}, {1} retry in {2}s'.format(
                e, retry, retry * 10))
            time.sleep(retry * 10)
Example #40
0
def process_parsing(url, output_stream):
    # url='http://www.yell.ru/spb/top/restorany/'
    time_start = time.time()
    url_prefix = 'http://www.yell.ru'
    r = requests.get(url)
    metro_urls = get_metro_stations(r.text)
    logging.info("metroes: %d" % len(metro_urls))

    # collect company urls for parsing
    pool = ThreadPool(NUMBER_PROCESSES)
    res_queue = ProcessQueue()
    results = pool.map(collect_company_urls, [(url_prefix + u, res_queue) for u in metro_urls])
    pool.close()
    pool.join()
    pool.terminate()

    # reduce urls
    count = 0
    reduced_url_set = set()
    while not res_queue.empty():
        count = count + 1
        url = res_queue.get_nowait()
        # FOR TEST ONLY - REMOVE THIS
        #if count < 10:
        reduced_url_set.add(url)

    logging.info("%d" % count)
    logging.info("%d" % len(reduced_url_set))

    # start company parsing pool ----
    logging.info('start!!!!!!')
    pool = ThreadPool(NUMBER_PROCESSES)

    for field in FIELDS:
        output_stream.write(field + ';')
    output_stream.write('\n')
    results = pool.map(parse_company_worker, [(url_prefix + u, output_stream) for u in reduced_url_set])
    pool.close()
    pool.join()
    pool.terminate()
    logging.info('done!!!!!')
    logging.info('finished in %s seconds' % (time.time() - time_start))
Example #41
0
    def execute_nodes(self):
        num_threads = self.config.threads
        target_name = self.config.target_name

        text = "Concurrency: {} threads (target='{}')"
        concurrency_line = text.format(num_threads, target_name)
        dbt.ui.printer.print_timestamped_line(concurrency_line)
        dbt.ui.printer.print_timestamped_line("")

        pool = ThreadPool(num_threads)
        try:
            self.run_queue(pool)

        except KeyboardInterrupt:
            pool.close()
            pool.terminate()

            adapter = get_adapter(self.config)

            if not adapter.is_cancelable():
                msg = ("The {} adapter does not support query "
                       "cancellation. Some queries may still be "
                       "running!".format(adapter.type()))

                yellow = dbt.ui.printer.COLOR_FG_YELLOW
                dbt.ui.printer.print_timestamped_line(msg, yellow)
                raise

            for conn_name in adapter.cancel_open_connections():
                dbt.ui.printer.print_cancel_line(conn_name)

            pool.join()

            dbt.ui.printer.print_run_end_messages(self.node_results,
                                                  early_exit=True)

            raise

        pool.close()
        pool.join()

        return self.node_results
Example #42
0
class ParallelRunner(SkeletonRunner):
	"""
	This class is used to evaluate a skeleton through parallel primitives.
	This runner allows to execute skeleton evaluation exploiting the
	parallel architecture, using all available processing elements.
	The parallelism is exploited evaulating the skeleton with different
	input elements in parallel and then joining all results.
	"""

	def __init__(self):
		self.pool = Pool(processes=36)
		
	def __del__(self):
		self.pool.close()
		self.pool.terminate()

	def run(self, skeleton, values, *params):
		results = self.pool.map(eval_parallel(skeleton, params[1], params[0]), values)
		self.pool.close()
		return results
Example #43
0
    def _fetch_photos_multi(self):
        rows = self.db.get_photo_status()
        if not rows:
            print('{0}的相册里没有照片'.format(self.target_id))
            return
        photos = []
        for row in rows:
            photos.append(json.loads(row['data']))

        count = len(photos)
        print("正在下载第{0}-{1}张照片 ...".format(
            self.photo_total, self.photo_total+count))
        pool = ThreadPool(8)
        try:
            pool.map(self._download_photo, photos)
            pool.close()
            pool.join()
            self.photo_total += count
        except KeyboardInterrupt:
            pool.terminate()
Example #44
0
def fetch_or_load_urls(fileanme):
    if os.path.exists(jsonfile):
        return json.load(open(jsonfile, 'r'))
    pool = ThreadPool(8)
    try:
        pool.map(findurls, range(1, 51))
        pool.close()
        pool.join()

    except KeyboardInterrupt:
        print('terminated by user.')
        pool.terminate()

    print(len(urls))

    items = [url_to_item(url) for url in sorted(urls, cmp=url_cmp)]
    # json.dump(items, open('urls.json', 'w'),indent=2) # 输出\uxxxx
    json.dump(items, codecs.open(fileanme, 'w', 'utf8'),  # 输出中文文字
              ensure_ascii=False, indent=2)

    return items
Example #45
0
 def start(self):
     while self.retry > 0:
         pool = ThreadPool(self.pool_size)
         try:
             pool.map(self.func, self.args)
             pool.close()
             pool.join()
             print('task execution completely.')
             break
         except KeyboardInterrupt, e:
             print('task terminated by user.', e)
             pool.terminate()
             pool.join()
             break
         except Exception, e:
             pool.terminate()
             pool.join()
             self.retry -= 1
             traceback.print_exc()
             print('task error: {0}, {1} retry in {2}s'.format(
                 e, sys.maxint - self.retry, 60))
             time.sleep(self.sleep)
Example #46
0
def get_all_urls_from_cat_multithread(category_param,
                                      nb_threads=concurrent_limit):
    """ Given a category URL param,
    returns the list of urls for all names in it.
    """
    # to get by letter, add parameter: &lettre=^[aA]
    logger.info('Extracting URLs for category parameter: %s',
                category_param)

    letters = string.lowercase
    pool = ThreadPool(nb_threads)
    try:
        # Lambda function for multiprocessing
        get_url_func = lambda x: get_letter_urls(x, category_param)
        results = pool.map(get_url_func, letters)
        pool.close()
        pool.join()
        # Results is a list of lists, make only one list
        urls = [url for l in results for url in l]
        return urls
    except KeyboardInterrupt:
        pool.terminate()
class SimpleDesktopsDownloader:
    def __init__(
        self,
        output="simpledesktops",
        max_threads=5,
        force=False,
        tree=False,
        logger=None,
    ):
        if not os.path.exists(output):
            os.mkdir(output)
        self.output = output
        self.pool = Pool(max_threads)
        self.force = force
        self.tree = tree

        self.session = requests.Session()
        self.skip_count = 0
        self.update_count = 0

        self.logger = logger or logging.getLogger()

    def join(self, path):
        return os.path.join(self.output, path)

    def download_job(self, img):
        # .295x184_q100.png
        img_url = img["src"][:-17]
        self.logger.info("start job: %s", img_url)
        # /uploads/desktops/
        path = urlparse(img_url).path[18:]
        if self.tree:
            dir = path.rsplit("/", 1)[0]
            os.makedirs(dir, exist_ok=True)
        else:
            path = path.replace("/", "-")

        path = self.join(path)
        if not self.force and os.path.exists(path):
            self.skip_count += 1
            self.logger.info("%s already exists! skip downloading ...", path)
            return
        with click.open_file(path, "wb") as fp:
            r = self.session.get(img_url)
            if not r.ok:
                self.logger.error("something wrong! [%d]%s", r.status_code, img_url)
                self.pool.terminate()
                return
            fp.write(r.content)
            self.update_count += 1
            self.logger.info("%s successfully downloaded.", path)

    def iter_download_job(self):
        url = "http://simpledesktops.com/browse/"
        page = 1
        while True:
            response = self.session.get(url + str(page))
            if response.status_code == 404:
                break
            page += 1
            bs = BeautifulSoup(response.text, "html.parser")
            for img in bs.select(".desktops .edge .desktop img"):
                yield img

    def download(self):
        self.logger.info("dispatching download jobs ...")
        self.pool.map(self.download_job, self.iter_download_job())
        self.logger.info(
            "all task done, %d updated, %d skipped, enjoy!",
            self.update_count,
            self.skip_count,
        )
Example #48
0
def mergeFilesByRegion(filesByRegion, grid, outputDir):
    # Merge a set of files by region into the specified dir
    # Key is up/down/nominal etc
    N = 0
    filesToWrite = {}
    for r in filesByRegion:
        for key in filesByRegion[r]:
            if filesByRegion[r][key] == []:
                if key == "Nominal":
                    print ("WARNING: no input files for region {0} key {1}".format(r, key))
                continue

            filePrefix = "%s_%s" % (r, grid)
            filename = os.path.join(outputDir, "%s.root" % (filePrefix))
            if os.path.exists(filename):
                print ("Output file {0} exists - skipping".format(os.path.basename(filename)))
                continue

            filesToWrite[filename] = {"region": r, "files": filesByRegion[r][key]}
            N += 1

    # Got anything?
    if filesToWrite == {}:
        return

    # build the pool arguments
    args = []
    for filename in filesToWrite:
        N -= 1
        args.append((filename, filesToWrite[filename]["files"], False, filesToWrite[filename]["region"], N))

    pool = ThreadPool(8, init_worker)
    try:
        # results = pool.map(mergeFiles, args)
        results = pool.imap_unordered(mergeFiles, args)
        pool.close()
        pool.join()
    except KeyboardInterrupt:
        print "Caught KeyboardInterrupt, terminating workers"
        pool.terminate()
        pool.join()

    return

    # Below is to be removed legacy code relying on hadd

    for r in filesByRegion:
        for key in filesByRegion[r]:
            if filesByRegion[r][key] == []:
                continue

            N -= 1

            # Merge the files in chunks of 50, and then merge these chunks

            # The whole idea behind this exercise is to avoid exceeding the maximum length of
            # of a command allowed in bash.

            filePrefix = "%s_%s" % (r, grid)
            filename = os.path.join(outputDir, "%s.root" % (filePrefix))
            if os.path.exists(filename):
                print ("Output file {0} exists - skipping".format(os.path.basename(filename)))
                continue

            mergeFiles(filename, filesByRegion[r][key])

            # fileMerger = ROOT.TFileMerger()
            # fileMerger.OutputFile(filename)
            # for f in filesByRegion[r][key]:
            #    fileMerger.AddFile(f)
            # fileMerger.Merge()

            # i=1
            # print("Attempting to make file {0}".format(filename))
            # for subset in chunks(filesByRegion[r][key], 50):
            #    print("Merging subset {0:d}...".format(i))
            #    filename = os.path.join(outputDir, "%s_%03d.root" % (filePrefix, i) )
            #    outputFiles.append(filename)
            #
            #    if len(subset) == 1:
            #        shutil.copy(subset[0], filename)
            #    else:
            #        cmd = "hadd -f %s %s" % (filename, " ".join(subset))
            #        subprocess.call(cmd, shell=True)
            #
            #    i+=1

            # print("Merging all subsets")
            # filename = os.path.join(outputDir, "%s.root" % (filePrefix) )

            # if len(outputFiles) == 1:
            #    # only 1 file, so just rename it
            #    os.rename(outputFiles[0], filename)
            # else:
            #    cmd = "hadd -f %s %s" % (filename, " ".join(outputFiles))
            #    subprocess.call(cmd, shell=True)

            # print("Done merging subsets; removing temporary files")
            # for f in outputFiles:
            #    if not os.path.exists(f): continue
            #    os.remove(f)

            print ("=> Created file for {0}; {1} files remaining".format(r, N))
class TextService(QtCore.QObject):
    """
    A TextService which handles all text processing including the fetching of images and voice
    """
    change_img = QtCore.Signal()

    def __init__(self, text, window, lang_en, def_counter):
        """
        :param text:
           Complete tale/story
        :param window:
            Story_UI window
        """
        QtCore.QObject.__init__(self)
        self.word_list = re.split('\s', text)
        self.window = window
        self.sentence_list = regex.split("(?V1)(?<=\.|:|;|-|,|\!)", text)
        self.sentence_list = self.join_short_sentences()
        self.keyword_list = []
        self.timing_list = []

        self.pool = ThreadPool(4)
        self.keyword_list = self.pool.map(derive_keyword, self.sentence_list)
        self.pool.close()
        self.pool.join()

        self.audio_service = AudioService(window)
        self.audio_thread = threading.Thread(target=self.audio_service.prepare_voice,
                                             args=(self.sentence_list, def_counter))
        self.audio_thread.setDaemon(True)
        self.audio_thread.start()
        self.image_thread = threading.Thread(target=image_from_keyword_list, args=(self.keyword_list, window, lang_en))
        self.image_thread.setDaemon(True)
        self.image_thread.start()
        # subtitle_thread = threading.Thread(target=window.set_subtitles, args=())
        # subtitle_thread.setDaemon(True)
        # subtitle_thread.start()

    def start_story(self, wait_seconds=5):
        """
        Starts the story telling but waits a few seconds (to preload some data)
        """
        self.audio_service.set_clip_callback(self.window.switch_to_next_image)
        sleep(wait_seconds)
        self.audio_service.start_audio()

    def get_sentence_list(self):
        return self.sentence_list

    def pause_play(self):
        """
        Pauses the audio or ends the Pause
        """
        self.audio_service.pause_play()

    def stop_play(self):
        """
        Stops the Story. Used for restart.
        """
        self.pool.terminate()
        self.audio_service.stop_play()

    def join_short_sentences(self):
        result_list = []
        for sentence in self.sentence_list:
            if len(sentence.split()) > 4:
                result_list.append(sentence)
            else:
                try:
                    result_list[-1] = result_list[-1] + sentence
                except IndexError:
                    result_list.append(sentence)
        return result_list
Example #50
0
from multiprocessing import Pool, Process
import time


def test((a, b)):
    print 'test %s %s' % (a, b)
    p = ThreadPool()
    p.map_async(time.sleep, [3,2,1]).get(120)
    return a ** 2, b ** 2

if __name__ == '__main__':

    # p = Pool()
    # import pdb;pdb.set_trace()
    try:
        p = ThreadPool()
        # results = p.map(test, [(1,2), (3,4)])
        results = p.map_async(test,
                              [(1, 2), (3, 4), (4, 4), (5, 4), (6, 4), (7, 4)]).get(120)
        # results = p.map_async(test, [1, 3], [2, 4]).get()
        p.close()
        p.join()
        print results
    except KeyboardInterrupt:
        print 'Catch KeyboardInterrupt in main'
        p.terminate()
        # p.join()
    # for item in results:
    #     print item.get()
    # results = tp.map(test, [(1,2), (3,4)])
Example #51
0
if __name__ == '__main__':
    
    switch, user, command, debug, parameters, logger , key_based = get_argument()
    debug_logging(debug)
    
    if parameters != None:
        parameters=parameters_validate(parameters, len(switch))
    
    command = command_to_send(command,parameters,len(switch))
    logging.debug("User: %s\nNetwork devices: %s\nCommands: %s\nDebug: %s\nLogging: %s\n"%(user,switch,command,debug,logger))
    
    if key_based:
        passwd=''
    else:
        passwd = getpass.getpass('Please provide password for user %s:'%user)
    
    data_holder= itertools.izip(switch,command,itertools.repeat(user),itertools.repeat(passwd),itertools.repeat(logger))
    pool = ThreadPool() 
    
    try:
        pool.map(connection_star_thread,data_holder)
        pool.close()
        pool.join()
        gracefully_exit()
        
    except KeyboardInterrupt:
        logging.error("Caught keyboard interrupt. Killing all threads.")
        pool.terminate()
        pool.join()
        gracefully_exit()
def _multipart_upload(syn, filename, contentType, get_chunk_function, md5, fileSize, 
                      partSize=None, **kwargs):
    """
    Multipart Upload.

    :param syn: a Synapse object
    :param filename: a string containing the base filename
    :param contentType: contentType_
    :param get_chunk_function: a function that takes a part number and size
                               and returns the bytes of that chunk of the file
    :param md5: the part's MD5 as hex.
    :param fileSize: total number of bytes
    :param partSize: number of bytes per part. Minimum 5MB.

    :return: a MultipartUploadStatus_ object

    Keyword arguments are passed down to :py:func:`_start_multipart_upload`.

    .. MultipartUploadStatus: http://rest.synapse.org/org/sagebionetworks/repo/model/file/MultipartUploadStatus.html
    .. contentType: https://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.17
    """
    partSize = calculate_part_size(fileSize, partSize, MIN_PART_SIZE, MAX_NUMBER_OF_PARTS)
    status = _start_multipart_upload(syn, filename, md5, fileSize, partSize, contentType, **kwargs)

    ## only force restart once
    kwargs['forceRestart'] = False

    completedParts = count_completed_parts(status.partsState)
    progress=True
    retries=0
    mp = Pool(8)
    try:
        while retries<MAX_RETRIES:
            ## keep track of the number of bytes uploaded so far
            completed = Value('d', min(completedParts * partSize, fileSize))
            printTransferProgress(completed.value, fileSize, prefix='Uploading', postfix=filename)
            chunk_upload = lambda part: _upload_chunk(part, completed=completed, status=status, 
                                                      syn=syn, filename=filename,
                                                      get_chunk_function=get_chunk_function,
                                                      fileSize=fileSize, partSize=partSize, t0=time.time())

            url_generator = _get_presigned_urls(syn, status.uploadId, find_parts_to_upload(status.partsState))
            mp.map(chunk_upload, url_generator)

            #Check if there are still parts
            status = _start_multipart_upload(syn, filename, md5, fileSize, partSize, contentType, **kwargs)
            oldCompletedParts, completedParts = completedParts, count_completed_parts(status.partsState)
            progress = (completedParts>oldCompletedParts)
            retries = retries+1 if not progress else retries

            ## Are we done, yet?
            if completed.value >= fileSize:
                try:
                    status = _complete_multipart_upload(syn, status.uploadId)
                    if status.state == "COMPLETED":
                        break
                except Exception as ex1:
                    sys.stderr.write(str(ex1)+"\n")
    finally:
        mp.terminate()
    if status["state"] != "COMPLETED":
        raise SynapseError("Upoad {id} did not complete. Try again.".format(id=status["uploadId"]))

    return status