def main():
    #global qtimes,qerr
    usage="<program> times_to_extend_url_list processes iterations"
    if len(sys.argv) < 4:
        print("Not enough arguments.  "+usage)
        return        

    times_to_extend_url_list = int(sys.argv[1])
    processes=int(sys.argv[2])
    iterations=int(sys.argv[3])

    urls = nextend(settings.URLS, times_to_extend_url_list)
    random.shuffle(urls)
    n = len(urls)
    timeout_secs=max(n/10, 60)

    logger = dry.logger.setup_log_size_rotating("log/urlclient_gevent_pool.log", 
                                                logname="urlclientgeventpool")

    logger.info("START|times_to_extend_url_list:{i}|processes:{p}|timeout:{t}|urls:{n}|type:gevent".format(i=times_to_extend_url_list, 
                                                                                                           p=processes,
                                                                                                           t=timeout_secs,
                                                                                                           n=n))

    pool = Pool(max(processes, 4))
    qurltime = queue.Queue()
    elapsed_time = []
    for i in range(iterations):
        start = time.time()
        try:
            jobs = [pool.spawn(load_url, url, logger,  logstatus=True) for url in urls]
#            jobs = [pool.spawn(load_url, url, logger, timeout=60, logstatus=True) for url in urls]
            pool.join(timeout=timeout_secs)
        except Exception as e:
            logger.error(e)

        elapsed_time.append(time.time() - start)

    elapsed_time_str = ""
    for t in elapsed_time:
        elapsed_time_str += str(t)+","
    elapsed_time_str.strip,(",")

    print("g,{u},{np},{et}".format(u=len(urls), np=processes, et=elapsed_time_str))
Example #2
0
def main():
    start = time.time()
    logger = dry.logger.setup_log_size_rotating("log/bigfile_futures_threadpool.log", 
                                                logname='bigfilefuturesthreads')
    
    logger.info("START")
    elapsed_time = []
    
    sfile = settings.BIG_FILE
    fsize = os.path.getsize(sfile)
    with  open(sfile, "r") as fh:
        #A list of tuples (chunk_start, chunk_size)
        chunks = size_chunks(fh, fsize, num_chunks=settings.BIGFILE_FUTURES_CHUNKS)
    
    pattern = re.compile(settings.TARGET_USERNAME)
    file_handles = []
    for j in range(len(chunks)):
        file_handles.append(open(sfile, "r"))
    
    with futures.ThreadPoolExecutor(max_workers=settings.BIGFILE_FUTURES_CHUNKS) as executor:
        future_to_chunk = dict( (executor.submit(find_noq, file_handles[i], chunks[i], pattern), "") \
                                for i in range(len(chunks)) )
        
    recsmatch = 0    
    
    try:
        for future in futures.as_completed(future_to_chunk, timeout=60):
            recsmatch += future.result()
    except Exception as e:
        #traceback.print_exc(file=sys.stdout)
        logger.error("recsmatch={m} e={e}".format(m=recsmatch, e=e))
        return
            
    elapsed_time.append(time.time() - start)
            
    elapsed_time_str = ""
    for t in elapsed_time:
        elapsed_time_str += str(t)+","
    elapsed_time_str.strip,(",")
    
    print("{r}".format(r=recsmatch))
    logger.info("STOP|elapsedtime:{et}|recsmatch:{r}".format(et=elapsed_time, r=recsmatch))
def main():
    usage="urlclient_futures iterations"
    if len(sys.argv) < 2:
        print(usage)
        return
        
    iterations = int(sys.argv[1])
    urls = nextend(settings.URLS, iterations)
    n = len(urls)
    timeout_secs=max(n/4, 60)
    
    logger = dry.logger.setup_log_size_rotating("log/urlclient_gevent_individual.log", 
                                                logname='urlclientgeventindividual')
    
    logger.info("iterations={i} timeout={t} n={n} type=gevent".format(i=iterations, 
                                                                  t=timeout_secs,
                                                                  n=n))
    
    jobs = [gevent.spawn(load_url, url, logger, timeout=60, 
                         logstatus=True) for url in urls]
    gevent.joinall(jobs, timeout=timeout_secs)  
def main():
    usage="urlclient_futures t|p times_to_extend_url_list processes iterations"
    
    if len(sys.argv) < 5:
        print("Not enough arguments.  "+usage)
        return
    
    if sys.argv[1] .lower() == "t":
        process_type="threads"
    elif sys.argv[1].lower() == "p":
        process_type = "mp"
    else:
        print("Must specify t or p for thread or process as first argument.")
        print(usage)
        return
    
    times_to_extend_url_list = int(sys.argv[2])
    processes=int(sys.argv[3])
    iterations = int(sys.argv[4])
 
    urls = nextend(settings.URLS, times_to_extend_url_list)
    n = len(urls)
    timeout_secs=max(n*3, 60)
    logger = dry.logger.setup_log_size_rotating("log/urlclient_futures.log", 
                                                logname='urlclientfutures')
    
    logger.info("times_to_extend_url_list={i} processes={p} timeout={t} n={n} type={y}".format(i=times_to_extend_url_list, 
                                                                  p=processes,
                                                                  t=timeout_secs,
                                                                  n=n,
                                                                  y=process_type))
    elapsed_time = []
    for i in range(iterations):
        j = 0
        start = time.time()
        if process_type == "threads":
            with futures.ThreadPoolExecutor(max_workers=processes) as executor:
                future_to_url = dict( (executor.submit(load_url, url, logger, timeout_secs=30,logstatus=True), url) for url in urls )
        else:
            #urls=urls[:9]
            try:
                with futures.ProcessPoolExecutor(max_workers=processes) as executor:
                    future_to_url = dict( (executor.submit(load_url_mp, url, timeout_secs=10), url) for url in urls )
            except Exception as e:
                print(e)
                #logger.error(e)
                
        try:
            for future in futures.as_completed(future_to_url, timeout=timeout_secs):
                j += 1
                url = future_to_url[future]
        except Exception as e:
            #traceback.print_exc(file=sys.stdout)
            logger.error("j={j} url={u} e={e}".format(j=j, e=e, u=url))
            
        elapsed_time.append(time.time() - start)
            
    elapsed_time_str = ""
    for t in elapsed_time:
        elapsed_time_str += str(t)+","
    elapsed_time_str.strip,(",")
    
    print("{pt},{u},{np},{et}".format(pt=process_type, u=len(urls), np=processes,
                                      et=elapsed_time_str))