def test_api(client_key): # 1. create test image data and both processing and result queues urls = ['https://demo.restb.ai/images/demo/demo-1.jpg', 'https://demo.restb.ai/images/demo/demo-2.jpg', 'https://demo.restb.ai/images/demo/demo-3.jpg', 'https://demo.restb.ai/images/demo/demo-4.jpg', 'https://demo.restb.ai/images/demo/demo-5.jpg', 'https://demo.restb.ai/images/demo/demo-6.jpg'] queue = mp.Queue() image_id = 1 for url in urls: for model in __MODELS.keys(): queue.put(dict(id=image_id, url=url, model=model)) image_id += 1 results = mp.Queue() # 2. Pick which API endpoint to use (US vs. EU) url = __URL_US # 3. Define concurrency specific objects # stats objects lock_stats = mp.Lock() counter = mp.Value('i', 0) avg_req_time = mp.Value('f', 0) time_start = mp.Value('f', 999999999999999) time_end = mp.Value('f', 0) # 4. Spawn processes/threads to process the images in the queue pool = [] for i in range(__requests_per_second): # pass in necessary parameters to thread, including client key, etc. p = mp.Process(target=image_process_thread, args=(url, client_key, queue, results, lock_stats, counter, avg_req_time, time_start, time_end)) pool.append(p) p.start() # 5. clean-up after queue has been processed with "poison pill" while not queue.empty(): # wait for queue to be processed time.sleep(1) for i in pool: # seed shutdown messages / poison pills queue.put(dict(id=-1, url='shutdown', model='shutdown')) for p in pool: # enforce clean shutdown of threads p.join() # 6. finally, return accumulated results total = time_end.value - time_start.value print('[{requests}] requests processed in [{seconds}] seconds with average time [{time}] ms, total throughput: [{throughput}] rps'.format( requests=counter.value, seconds=str(round(total / 1000.0, 1)), time=str(round(avg_req_time.value / counter.value, 0)), throughput=str(round(counter.value / (total / 1000.0), 2)) )) return results
def __init__(self, urls, dest=None, progress_bar=True, fix_urls=True, logger=None, connect_default_logger=False): self.mirrors = [urls] if isinstance(urls, basestring) else urls if fix_urls: self.mirrors = [utils.url_fix(x) for x in self.mirrors] self.url = self.mirrors.pop(0) fn = os.path.basename(urlparse(self.url).path) self.dest = dest or os.path.join(tempfile.gettempdir(), 'pySmartDL', fn) if self.dest[-1] == os.sep: if os.path.exists(self.dest[:-1]) and os.path.isfile(self.dest[:-1]): os.unlink(self.dest[:-1]) self.dest += fn if os.path.isdir(self.dest): self.dest = os.path.join(self.dest, fn) self.progress_bar = progress_bar if logger: self.logger = logger elif connect_default_logger: self.logger = utils.create_debugging_logger() else: self.logger = utils.DummyLogger() self.headers = {'User-Agent': utils.get_random_useragent()} self.threads_count = 3 self.timeout = 4 self.current_attemp = 1 self.attemps_limit = 4 self.minChunkFile = 1024**2*2 # 2MB self.filesize = 0 self.shared_var = multiprocessing.Value(c_int, 0) # a ctypes var that counts the bytes already downloaded self.thread_shared_cmds = {} self.status = "ready" self.verify_hash = False self._killed = False self._failed = False self._start_func_blocking = True self.errors = [] self.post_threadpool_thread = None self.control_thread = None if not os.path.exists(os.path.dirname(self.dest)): self.logger.debug('Folder "%s" does not exist. Creating...' % os.path.dirname(self.dest)) os.makedirs(os.path.dirname(self.dest)) if not utils.is_HTTPRange_supported(self.url): self.logger.warning("Server does not support HTTPRange. threads_count is set to 1.") self.threads_count = 1 if os.path.exists(self.dest): self.logger.warning('Destination "%s" already exists. Existing file will be removed.' % self.dest) if not os.path.exists(os.path.dirname(self.dest)): self.logger.warning('Directory "%s" does not exist. Creating it...' % os.path.dirname(self.dest)) os.makedirs(os.path.dirname(self.dest)) self.pool = utils.ManagedThreadPoolExecutor(self.threads_count)
def __init__(self, urls, dest=None, max_threads=5, show_output=True, logger=None): self.mirrors = [urls] if isinstance(urls, basestring) else urls for i, url in enumerate(self.mirrors): if " " in url: self.mirrors[i] = utils.url_fix(url) self.url = self.mirrors.pop(0) self.dest = dest or r"%s\%s" % (config.temp_dir, urlparse( self.url).path.split('/')[-1]) self.show_output = show_output self.logger = logger or logging.getLogger('dummy') self.max_threads = max_threads self.headers = config.generic_http_headers self.timeout = 4 self.current_attemp = 1 self.attemps_limit = 4 self.minChunkFile = 1024**2 # 1MB self.filesize = 0 self.shared_var = multiprocessing.Value( c_int, 0) # a ctypes var that counts the bytes already downloaded self.status = "ready" self.verify_hash = False self._killed = False self._failed = False self.post_threadpool_thread = None self.control_thread = None if not is_ServerSupportHTTPRange(self.url): self.logger.warning( "Server does not support HTTPRange. max_threads is set to 1.") self.max_threads = 1 if os.path.exists(self.dest): self.logger.warning( "Destination '%s' already exists. Existing file will be removed." % self.dest) if not os.path.exists(os.path.dirname(self.dest)): self.logger.warning( "Directory '%s' does not exist. Creating it..." % os.path.dirname(self.dest)) os.makedirs(os.path.dirname(self.dest)) self.pool = ThreadPool(max_threads=self.max_threads, catch_returns=True, logger=self.logger)
def __init__(self, urls, dest=None, progress_bar=True, fix_urls=True, threads=5, timeout=5, logger=None, connect_default_logger=False, request_args=None, verify=True, cookie_file=None): self.cookie_file = cookie_file if logger: self.logger = logger elif connect_default_logger: self.logger = utils.create_debugging_logger() else: self.logger = utils.DummyLogger() if request_args: if "headers" not in request_args: request_args["headers"] = dict() self.requestArgs = request_args else: self.requestArgs = {"headers": dict()} if "User-Agent" not in self.requestArgs["headers"]: self.requestArgs["headers"][ "User-Agent"] = utils.get_random_useragent() self.mirrors = [urls] if isinstance(urls, str) else urls if fix_urls: self.mirrors = [utils.url_fix(x) for x in self.mirrors] self.url = self.mirrors.pop(0) self.logger.info('Using url "{}"'.format(self.url)) fn = urllib.parse.unquote( os.path.basename(urllib.parse.urlparse(self.url).path)) self.dest = dest or os.path.join(tempfile.gettempdir(), 'pySmartDL', fn) if self.dest[-1] == os.sep: if os.path.exists(self.dest[:-1]) and os.path.isfile( self.dest[:-1]): os.unlink(self.dest[:-1]) self.dest += fn if os.path.isdir(self.dest): self.dest = os.path.join(self.dest, fn) self.progress_bar = progress_bar self.threads_count = threads self.timeout = timeout self.current_attemp = 1 self.attemps_limit = 4 self.minChunkFile = 1024**2 * 2 # 2MB self.filesize = 0 self.shared_var = multiprocessing.Value( c_int, 0) # a ctypes var that counts the bytes already downloaded self.thread_shared_cmds = {} self.status = "ready" self.verify_hash = False self._killed = False self._failed = False self._start_func_blocking = True self.errors = [] self.post_threadpool_thread = None self.control_thread = None if not os.path.exists(os.path.dirname(self.dest)): self.logger.info('Folder "{}" does not exist. Creating...'.format( os.path.dirname(self.dest))) os.makedirs(os.path.dirname(self.dest)) if not utils.is_HTTPRange_supported(self.url, timeout=self.timeout): self.logger.warning( "Server does not support HTTPRange. threads_count is set to 1." ) self.threads_count = 1 if os.path.exists(self.dest): self.logger.warning( 'Destination "{}" already exists. Existing file will be removed.' .format(self.dest)) if not os.path.exists(os.path.dirname(self.dest)): self.logger.warning( 'Directory "{}" does not exist. Creating it...'.format( os.path.dirname(self.dest))) os.makedirs(os.path.dirname(self.dest)) self.logger.info("Creating a ThreadPool of {} thread(s).".format( self.threads_count)) self.pool = utils.ManagedThreadPoolExecutor(self.threads_count) if verify: self.context = None else: self.context = ssl.create_default_context() self.context.check_hostname = False self.context.verify_mode = ssl.CERT_NONE
import os import urllib2 import time import multiprocessing.dummy as multiprocessing import string from random import choice import socket from ctypes import c_int import tempfile import dummy from logger import log "Smart Downloading Module. Written by Itay Brandes." shared_bytes_var = multiprocessing.Value( c_int, 0) # a ctypes var that counts the bytes already downloaded def DownloadFile(url, path, startByte=0, endByte=None, ShowProgress=True): ''' Function downloads file. @param url: File url address. @param path: Destination file path. @param startByte: Start byte. @param endByte: End byte. Will work only if server supports HTTPRange headers. @param ShowProgress: If true, shows textual progress bar. @return path: Destination file path. ''' url = url.replace(' ', '%20') headers = {}
SOGOU = [ '6b-f2--5-----ec---98-------034d1a-7--', '18-------b9fd7-c--203-------46-a-5e--', '-6-0--3----d-ea---f43b-7-c-8-219----5', '-70d--ac----f6-e-4b5--9-----3----21-8', '--3-1--45-c-7--2-a-e--b-8-6--0d----f9', '----3d658----b4------a01c9-27-e---f--', '8---d-7--4-f---2e9--5-1---63--b--ca-0', 'd49---5---c-6b----a-3------207--8f1-e', '7---65-------0-----f--9--12-4e8cbad-3', '1-8--fl52--9----e-d4-b-a-c--3--0--7-6'] BREAK_EVENT = mt.Event() _LOCK = mt.Lock() _DONE = mt.Value('i', 0) _FAIL = mt.Value('i', 0) _EMPTY = mt.Value('i', 0) logger = src.util.loginit.get_logger('tyc2') def get_login(): url = 'https://www.tianyancha.com/cd/login.json' login_json = {'mobile': '13606181270', 'cdpassword': '******', 'loginway': 'PL', 'autoLogin': True} """ login_json = {'mobile': '18361296750',
from random import choice import socket from ctypes import c_int import tempfile import requests import re import sys import dummy # change this url = "https://url/file" headers = {'user-agent': 'sirmx/0.0.1'} shared_bytes_var = multiprocessing.Value(c_int, 0) def DownloadFile(url, path, startByte=0, endByte=None, ShowProgress=True): url = url.replace(' ', '%20') headers = {} if endByte is not None: headers['Range'] = 'bytes=%d-%d' % (startByte,endByte) req = urllib2.Request(url, headers=headers) try: urlObj = urllib2.urlopen(req, timeout=4) except urllib2.HTTPError, e: if "HTTP Error 416" in str(e): print " Retrying..." time.sleep(5) return DownloadFile(url, path, startByte, endByte, ShowProgress)
def __init__(self, urls, dest=None, progress_bar=True, fix_urls=True, threads=5, logger=None, connect_default_logger=False, proxy=None): global DEFAULT_LOGGER_CREATED self.mirrors = [urls] if isinstance(urls, basestring) else urls if fix_urls: self.mirrors = [utils.url_fix(x) for x in self.mirrors] self.url = self.mirrors.pop(0) if proxy is not None: proxy = urllib2.ProxyHandler({'http': proxy, 'https': proxy}) opener = urllib2.build_opener(proxy) urllib2.install_opener(opener) fn = urllib2.unquote(os.path.basename(urlparse(self.url).path)) if sys.version_info < (3, 0): fn = fn.decode('utf-8') # required only on python 2 self.dest = dest or os.path.join(tempfile.gettempdir(), 'pySmartDL', fn) if self.dest[-1] == os.sep: if os.path.exists(self.dest[:-1]) and os.path.isfile( self.dest[:-1]): os.unlink(self.dest[:-1]) self.dest += fn if os.path.isdir(self.dest): self.dest = os.path.join(self.dest, fn) self.progress_bar = progress_bar if logger: self.logger = logger elif connect_default_logger: if not DEFAULT_LOGGER_CREATED: self.logger = utils.create_debugging_logger() DEFAULT_LOGGER_CREATED = True else: self.logger = logging.getLogger('pySmartDL') else: self.logger = utils.DummyLogger() self.headers = {'User-Agent': utils.get_random_useragent()} self.threads_count = threads self.timeout = 4 self.current_attemp = 1 self.attemps_limit = 4 self.minChunkFile = 1024**2 * 2 # 2MB self.filesize = 0 self.shared_var = multiprocessing.Value( c_int, 0) # a ctypes var that counts the bytes already downloaded self.thread_shared_cmds = {} self.status = "ready" self.verify_hash = False self._killed = False self._failed = False self._start_func_blocking = True self.errors = [] self.post_threadpool_thread = None self.control_thread = None if not os.path.exists(os.path.dirname(self.dest)): self.logger.info('Folder "%s" does not exist. Creating...' % os.path.dirname(self.dest)) os.makedirs(os.path.dirname(self.dest)) if not utils.is_HTTPRange_supported(self.url): self.logger.warning( "Server does not support HTTPRange. threads_count is set to 1." ) self.threads_count = 1 if os.path.exists(self.dest): self.logger.warning( 'Destination "%s" already exists. Existing file will be removed.' % self.dest) if not os.path.exists(os.path.dirname(self.dest)): self.logger.warning( 'Directory "%s" does not exist. Creating it...' % os.path.dirname(self.dest)) os.makedirs(os.path.dirname(self.dest)) self.logger.info("Creating a ThreadPool of %d thread(s).", self.threads_count) self.pool = utils.ManagedThreadPoolExecutor(self.threads_count)