def update_courses(self, max_retries=5, timeout=20): """Extract courses from the index page. :return: list of Course objects """ session = requests.Session() retry_adapter = HTTPAdapter(max_retries=5) session.mount(self._course_list_url, retry_adapter) html = session.get(self._course_list_url).text links, names = self._course_link_filter(html) links = list( map(lambda l: parse.urljoin(self._course_list_url, l), links)) session = FuturesSession() # we assume the individual courses have the same prefix as the course # list parsed_url = parse.urlparse(self._course_list_url) prefix = parsed_url.scheme + '://' + parsed_url.netloc session.mount(prefix, retry_adapter) futures = [] for i, l in enumerate(links): print('Processing {} ({} of {})'.format(l, i + 1, len(links))) futures.append((l, session.get(l, timeout=timeout))) self._courses = [ course for lst in (self._course_filter(f.result().text, page_url=l) for l, f in futures if not f.exception()) for course in lst ]
def AsyncSession( retries: int = 10, backoff_factor: float = 0.3, allowed_methods: Iterable[str] = ('HEAD', 'TRACE', 'GET', 'POST', 'PUT', 'OPTIONS', 'DELETE'), status_forcelist: Iterable[int] = (408, 429, 500, 502, 503, 504), ) -> FuturesSession: """Return a Session object with full retry capabilities. Args: retries (int): number of retries backoff_factor (float): speed factor for retries (in seconds) allowed_methods (iterable): http methods to retry on status_forcelist (iterable): http status codes to retry on Returns: :py:class:`requests.Session`: session object """ session = FuturesSession() retry = Retry( total=retries, connect=retries, read=retries, redirect=retries, # status=retries, allowed_methods=allowed_methods, status_forcelist=status_forcelist, backoff_factor=backoff_factor, ) adapter = HTTPAdapter(max_retries=retry) session.mount('http://', adapter) session.mount('https://', adapter) return session
def _init_session(session, **kwargs): if session is None: if kwargs.get('asynchronous'): session = FuturesSession(max_workers=kwargs.get('max_workers', 8)) else: session = Session() if kwargs.get('proxies'): session.proxies = kwargs.get('proxies') retries = Retry( total=3, backoff_factor=1, status_forcelist=[429, 500, 502, 503, 504], method_whitelist=["HEAD", "GET", "OPTIONS", "POST", "TRACE"]) session.mount('https://', TimeoutHTTPAdapter( max_retries=retries, timeout=kwargs.get('timeout', DEFAULT_TIMEOUT))) # TODO: Figure out how to utilize this within the validate_response # TODO: This will be a much better way of handling bad requests than # TODO: what I'm currently doing. # session.hooks['response'] = \ # [lambda response, *args, **kwargs: response.raise_for_status()] session.headers.update({ "User-Agent": random.choice(USER_AGENT_LIST) }) return session
def get_events_from_icalendars(): global now, midnight now = localtz.localize(datetime.datetime.now()) midnight = localtz.localize(datetime.datetime.combine(now, datetime.time(0,0,0))) cz = Calzone() session = FuturesSession() session.mount('https://', CacheControlAdapter(cache=FileCache('.webcache'), heuristic=ForceCacheHeuristic())) cals = {k: session.get(u) for k,u in calendars.items()} concurrent.futures.wait(cals.values()) for k,req in cals.items(): try: cz.load(req.result().text) except Exception as err: print("Failed to load calendar '{}'".format(k)) print (err) try: events = cz.get_events(midnight, midnight + datetime.timedelta(days=90)) except Exception as e: print (e) events.sort(key=lambda e: e.start) return events
class APNsClient(object): def __init__(self, cert_file, use_sandbox=False, use_alternative_port=False): server = 'api.development.push.apple.com' if use_sandbox else 'api.push.apple.com' port = 2197 if use_alternative_port else 443 self.cert = cert_file self.server = "https://{}:{}".format(server, port) self.__connection = FuturesSession() self.__connection.mount('https://', HTTP20Adapter()) def send_notification(self, tokens, notification, priority=NotificationPriority.Immediate, topic=None): # print(notification.dict()) json_payload = json.dumps(notification.dict(), ensure_ascii=False, separators=(',', ':')).encode('utf-8') headers = { 'apns-priority': priority.value } if topic: headers['apns-topic'] = topic if not isinstance(tokens, list): tokens = [tokens] for token in tokens: url = '{}/3/device/{}'.format(self.server, token) self.__connection.post(url, json_payload, headers=headers, cert=self.cert, background_callback=req_callback)
def _init_session(session=None, **kwargs): session_headers = headers if session is None: if kwargs.get("asynchronous"): session = FuturesSession(max_workers=kwargs.get("max_workers", 8)) else: session = Session() if kwargs.get("proxies"): session.proxies = kwargs.get("proxies") retries = Retry( total=kwargs.get("retry", 5), backoff_factor=kwargs.get("backoff_factor", 0.3), status_forcelist=kwargs.get("status_forcelist", [429, 500, 502, 503, 504]), method_whitelist=["HEAD", "GET", "OPTIONS", "POST", "TRACE"], ) if kwargs.get("verify"): session.verify = kwargs.get("verify") session.mount( "https://", TimeoutHTTPAdapter(max_retries=retries, timeout=kwargs.get("timeout", DEFAULT_TIMEOUT)), ) # TODO: Figure out how to utilize this within the validate_response # TODO: This will be a much better way of handling bad requests than # TODO: what I'm currently doing. # session.hooks['response'] = \ # [lambda response, *args, **kwargs: response.raise_for_status()] user_agent = kwargs.get("user_agent", random.choice(USER_AGENT_LIST)) session_headers["User-Agent"] = user_agent if kwargs.get("headers") and isinstance(kwargs.get("headers"), dict): session_headers.update(**headers) session.headers.update(**session_headers) return session
def _get_raw_data(self): docktor_config = providers_config.providers['docktor'] apps = [] session = FuturesSession(max_workers=CONCURRENT_JOBS_LIMIT) session.mount('https://', self.__requests_http_adapter) session.mount('http://', self.__requests_http_adapter) for stage in docktor_config: for zone in docktor_config[stage]: apps_uri = '{uri}/apps/'.format(uri=docktor_config[stage][zone]['uri']) try: r = session.get(apps_uri, timeout=REQUEST_TIMEOUT).result() r.raise_for_status() apps_list = r.json() except ValueError as e: logger.error("Non json response {} from {}-{} docktor".format(r.content, stage, zone)) raise e except Exception as e: logger.error("Exception raised on {}-{} docktor".format(stage, zone)) raise e future_apps_details = [session.get('{apps_uri}{app}'.format(apps_uri=apps_uri, app=app), timeout=REQUEST_TIMEOUT) for app in apps_list] try: apps_details = [a.result() for a in future_apps_details] except Exception as e: logger.error("Exception raised on {}-{} docktor".format(stage, zone)) raise e partial_get_app_info = partial(self.get_app_info, stage, zone) apps.extend(map(lambda a: partial_get_app_info(a), apps_details)) return apps
def get_async_requests_session(num_retries, backoff_factor, pool_size, status_forcelist=None): # Use requests & urllib3 to auto-retry. # If the backoff_factor is 0.1, then sleep() will sleep for [0.1s, 0.2s, # 0.4s, ...] between retries. It will also force a retry if the status # code returned is in status_forcelist. if status_forcelist is None: status_forcelist = [500, 502, 503, 504] session = FuturesSession(max_workers=pool_size) # If any regular response is generated, no retry is done. Without using # the status_forcelist, even a response with status 500 will not be # retried. retries = Retry(total=num_retries, backoff_factor=backoff_factor, status_forcelist=status_forcelist) # Mount handler on both HTTP & HTTPS. session.mount( 'http://', HTTPAdapter(max_retries=retries, pool_connections=pool_size, pool_maxsize=pool_size)) session.mount( 'https://', HTTPAdapter(max_retries=retries, pool_connections=pool_size, pool_maxsize=pool_size)) return session
def compile_data( api_url, last_audit_year, last_opinion_year, last_uifw_year, last_audit_quarter, ): # Setup the client http_client = FuturesSession(executor=ThreadPoolExecutor(max_workers=10)) http_client.mount( f'{urlparse(api_url).scheme}://', HTTPAdapter(max_retries=Retry(total=5, backoff_factor=1, status_forcelist=[500]), ), ) def get(url, params): return http_client.get(url, params=params, verify=False) api_client = ApiClient(get, api_url) # Compile data compile_profiles( api_client, last_audit_year, last_opinion_year, last_uifw_year, last_audit_quarter, ) compile_medians(api_client) compile_rating_counts(api_client)
class Scraper(object): def __init__(self, url): self.url = url self.session = FuturesSession(max_workers=100) adapter = requests.adapters.HTTPAdapter(pool_connections=100, pool_maxsize=100) self.session.mount('http://', adapter) self.session.mount('https://', adapter) def _extract_image_urls(self, soup): for img in soup.findAll("img", src=True): yield urljoin(self.url, img["src"]) def _find_thumbnail_image(self): content_type, content = _fetch_url(self.url, session=self.session) soup = BeautifulSoup(content) image_urls = self._extract_image_urls(soup) image_urls = [u for u in image_urls] # turn to list image_urls = list(set(image_urls)) # lose duplicates image_sizes = _parallel_get_sizes(image_urls, self.session) logging.debug('got sizes for {} images'.format(len(image_sizes))) # find biggest max_area = 0 max_url = None for image_url in image_urls: size = image_sizes[image_url] if not size: continue # ignore little images area = size[0] * size[1] if area < 5000: logging.debug('ignore little {}'.format(image_url)) continue # ignore excessively long/wide images if max(size) / min(size) > 1.5: logging.debug('ignore dimensions {}'.format(image_url)) continue # penalize images with "sprite" in their name if 'sprite' in image_url.lower(): logging.debug('penalizing sprite {}'.format(image_url)) area /= 10 if area > max_area: max_area = area max_url = image_url return max_url def scrape(self): thumbnail_url = self._find_thumbnail_image() #thumbnail = _make_thumbnail_from_url(thumbnail_url, referer=self.url) return thumbnail_url
def make(cls, future=False, timeout=5, max_workers=10): if future: executor = ThreadPoolExecutor(max_workers=max_workers) session = FuturesSession(executor=executor) else: session = StandardSession() adapter = TimeoutHTTPAdapter(timeout=timeout) session.mount(prefix="http://", adapter=adapter) session.mount(prefix="https://", adapter=adapter) return session
def _init_session(session, **kwargs): if session is None: if kwargs.get('asynchronous'): session = FuturesSession(max_workers=kwargs.get('max_workers', 8)) else: session = Session() if kwargs.get('proxies'): session.proxies = kwargs.get('proxies') retries = \ Retry(total=3, backoff_factor=1, status_forcelist=[502, 503, 504]) session.mount('http://', HTTPAdapter(max_retries=retries)) return session
class CrashInfo(object): # TODO: count is probably erroneous since there is a range by default in supersearch... CRASH_STATS_URL = 'https://crash-stats.mozilla.com' SUPERSEARCH_URL = CRASH_STATS_URL + '/api/SuperSearch' TIMEOUT = 5 MAX_RETRIES = 5 MAX_WORKERS = multiprocessing.cpu_count() def __init__(self, paths, credentials = None): self.results = [ ] self.credentials = credentials self.info = { } self.paths = [paths] if type(paths) == str else paths for path in self.paths: self.info[path] = { 'crashes': -1 } self.session = FuturesSession(max_workers = self.MAX_WORKERS) self.session.mount(self.CRASH_STATS_URL, HTTPAdapter(max_retries = self.MAX_RETRIES)) self.__get_info() def get(self): for r in self.results: r.result() return self.info def __get_apikey(self): if self.credentials: return self.credentials['tokens'][self.CRASH_STATS_URL] else: return '' def __info_cb(self, path): def cb(sess, res): self.info[path]['crashes'] = res.json()['total'] return cb def __get_info(self): header = { 'Auth-Token': self.__get_apikey() } for path in self.paths: filename = os.path.basename(path) self.results.append(self.session.get(self.SUPERSEARCH_URL, params = { 'product': 'Firefox', 'topmost_filenames': filename, '_results_number': 0, '_facets': 'product', '_facets_size': 1 }, headers = header, timeout = self.TIMEOUT, background_callback = self.__info_cb(path)))
def async_retryable_session(executor: ThreadPoolExecutor) -> FuturesSession: session = FuturesSession(executor) retries = 3 retry = Retry( total=retries, read=retries, connect=retries, backoff_factor=0.5, status_forcelist=(SC_TOO_MANY_REQUESTS, ), ) adapter = HTTPAdapter(max_retries=retry) session.mount('http://', adapter) session.mount('https://', adapter) return session
class FXRevision(object): ARCHIVES_URL = 'http://archive.mozilla.org' NIGHTLY_URL = ARCHIVES_URL + '/pub/firefox/nightly/' TIMEOUT = 5 MAX_RETRIES = 5 def __init__(self, versions, fx_version, os): self.results = [ ] self.dates = { } self.fx_version = fx_version self.os = os self.info = { } pattern = re.compile('([0-9]{4})([0-9]{2})([0-9]{2})([0-9]{2})([0-9]{2})([0-9]{2})') for version in versions: m = pattern.search(version) self.dates[version] = [m.group(i) for i in range(1, 7)] self.session = FuturesSession() self.session.mount(self.ARCHIVES_URL, HTTPAdapter(max_retries = self.MAX_RETRIES)) self.__get_info() def get(self): for r in self.results: r.result() return self.info def __make_url(self, l): return self.NIGHTLY_URL + l[0] + '/' + l[1] + '/' + '-'.join(l) + '-mozilla-central/firefox-' + self.fx_version + '.en-US.' + self.os + '.json' def __info_cb(self, sess, res): json = res.json() self.info[json['buildid']] = json['moz_source_stamp'] def __get_info(self): for date in self.dates.itervalues(): self.results.append(self.session.get(self.__make_url(date), timeout = self.TIMEOUT, background_callback = self.__info_cb))
def concurrent_post(self, parents: list, threads=CONCURRENT_POST_DEFAULT_THREADS, raise_http_error=False, log=False) -> list: request_list = [] session = FuturesSession(executor=ThreadPoolExecutor( max_workers=threads)) session.mount('http://', HTTPAdapter(max_retries=self.RETRIES_CONFIG)) for parent in parents: processed_parent = self.remove_empty_fields(parent) item_type = processed_parent['item_type'] future_request = session.post( f'{self.BASE_URL}{POST_ENDPOINTS[item_type]}', **self._prepare_post_kwargs(processed_parent)) future_request.parent = processed_parent request_list.append(future_request) post_results = [] estimator = Estimate() for index, completed_request in enumerate(as_completed(request_list)): res = completed_request.result() if log: progress, eta = estimator.get(index + 1, len(request_list)) log_message = f'POST {res.status_code} variant({completed_request.parent["rid"]}) processed {index+1}/{len(request_list)}({progress}%) requests ETA {eta}' if res.status_code < 400: self.logger.info(log_message) else: self.logger.error(log_message) post_results.append( self._handle_post_response( res, completed_request.parent, completed_request.parent['item_type'], raise_http_error)) return post_results
def _init_session(session, **kwargs): session_headers = headers if session is None: if kwargs.get('asynchronous'): session = FuturesSession(max_workers=kwargs.get('max_workers', 8)) else: session = Session() if kwargs.get('proxies'): session.proxies = kwargs.get('proxies') retries = Retry( total=kwargs.get('retry', 5), backoff_factor=kwargs.get('backoff_factor', .3), status_forcelist=kwargs.get('status_forcelist', [429, 500, 502, 503, 504]), method_whitelist=['HEAD', 'GET', 'OPTIONS', 'POST', 'TRACE']) if kwargs.get('verify'): session.verify = kwargs.get('verify') session.mount( 'https://', TimeoutHTTPAdapter(max_retries=retries, timeout=kwargs.get('timeout', DEFAULT_TIMEOUT))) # TODO: Figure out how to utilize this within the validate_response # TODO: This will be a much better way of handling bad requests than # TODO: what I'm currently doing. # session.hooks['response'] = \ # [lambda response, *args, **kwargs: response.raise_for_status()] user_agent = kwargs.get('user_agent', random.choice(USER_AGENT_LIST)) # if kwargs.get('cookies'): # cookies = get_cookies(user_agent) # [session.cookies.set(c['name'], c['value']) for c in cookies] # session_headers.update({ # 'cookie': '; '.join([ # item['name'] + "=" + item['value'] for item in cookies # ]) # }) session_headers['User-Agent'] = user_agent session.headers.update(**session_headers) return session
class Backtrace(object): CRASH_STATS_URL = 'https://crash-stats.mozilla.com' PROCESSED_URL = CRASH_STATS_URL + '/api/ProcessedCrash/' TIMEOUT = 5 MAX_RETRIES = 5 MAX_WORKERS = multiprocessing.cpu_count() def __init__(self, uuids, fraction = 0.2, just_hg = False, credentials = None): self.just_hg = just_hg self.results = [ ] self.credentials = credentials self.uuids = uuids self.fraction = max(0., min(fraction, 1.)) self.info = { } self.session = FuturesSession(max_workers = self.MAX_WORKERS) self.session.mount(self.CRASH_STATS_URL, HTTPAdapter(max_retries = self.MAX_RETRIES)) self.__get_info() def get(self): for r in self.results: r.result() return self.info def __get_apikey(self): if self.credentials: return self.credentials['tokens'][self.CRASH_STATS_URL] else: return '' @staticmethod def __cycles_detection(funs): # TODO: improve this algorithm (not sure that's a good one) positions = { } # we get the function positions in the trace for i in range(len(funs)): fun = funs[i] if fun in positions: positions[fun].append(i) else: positions[fun] = [ i ] lengths = { } for k, v in positions.iteritems(): if len(v) >= 2: l = v[1] - v[0] good = True for i in range(2, len(v)): if v[i] - v[i - 1] != l: good = False break if good: if l in lengths: lengths[l].append((k, v)) else: lengths[l] = [ (k, v) ] cycles = [ ] for k, v in lengths.iteritems(): l = sorted(v, cmp = lambda x, y: cmp(x[1][0], y[1][0])) pat = [ ] container = [ l[0][0] ] pos = l[0][1][0] for i in range(1, len(l)): _pos = l[i][1][0] if _pos == pos + 1: container.append(l[i][0]) pos = _pos else: pat.append(tuple(container)) container = [ l[i][0] ] pos = _pos pat.append(tuple(container)) cycles += pat cycles = tuple(cycles) return cycles def __info_cb(self, sess, res): json = res.json() if 'json_dump' in json: uuid = json['uuid'] jd = json['json_dump'] if 'crashedThread' in json and 'threads' in jd: ct = json['crashedThread'] ct = jd['threads'][ct] self.info[uuid] = { 'cycles': [ ], 'address': '', 'cpu_name': json['cpu_name'], 'cpu_info': json['cpu_info'], 'reason': json['reason'], 'os': json['os_pretty_version'] } if 'frames' in ct: frames = ct['frames'] functions = [ ] # we get the functions in the backtrace (to check if there is a recursion) for frame in frames: if 'function' in frame: functions.append(frame['function']) # check for duplicated entries in function self.info[uuid]['cycles'] = Backtrace.__cycles_detection(functions) if 'crash_info' in jd: addr = jd['crash_info']['address'] self.info[uuid]['address'] = addr def __hginfo_cb(self, sess, res): json = res.json() if 'json_dump' in json: uuid = json['uuid'] jd = json['json_dump'] if 'crashedThread' in json and 'threads' in jd: ct = json['crashedThread'] ct = jd['threads'][ct] self.info[uuid] = { 'hgfiles': [ ] } if 'frames' in ct: frames = ct['frames'] files = [ ] # _files is just used to avoid duplicated in files _files = set() for frame in frames: if 'file' in frame: f = frame['file'] if f not in _files: files.append(f) _files.add(f) self.info[uuid] = files def __get_info(self): header = { 'Auth-Token': self.__get_apikey() } if self.just_hg: self.results.append(self.session.get(self.PROCESSED_URL, params = { 'crash_id': self.uuids[0] }, headers = header, timeout = self.TIMEOUT, background_callback = self.__hginfo_cb)) return if self.fraction != 1: L = len(self.uuids) indices = set() ratio = self.fraction if self.fraction <= 0.5 else 1 - self.fraction N = int(float(L) * ratio) # we analyze only a fraction of all the uuids while len(indices) != N: indices.add(randint(0, L - 1)) if self.fraction <= 0.5: uuids = [self.uuids[i] for i in indices] else: uuids = [ ] for i in range(L): if i not in indices: uuids.append(self.uuids[i]) else: uuids = self.uuids for uuid in uuids: self.results.append(self.session.get(self.PROCESSED_URL, params = { 'crash_id': uuid }, headers = header, timeout = self.TIMEOUT, background_callback = self.__info_cb))
def addManyMementos(self, urims): module_logger.info("started with {} URI-Ms for processing...".format(len(urims))) # protect the function from duplicates in the urims list urims = list(set(urims)) module_logger.info("found duplicates, now using {} URI-Ms for processing...".format(len(urims))) futuressession = FuturesSession(session=self.session) retry = Retry( total=10, read=10, connect=10, backoff_factor=0.3, status_forcelist=(500, 502, 504) ) adapter = HTTPAdapter(max_retries=retry) futuressession.mount('http://', adapter) futuressession.mount('https://', adapter) futures = {} raw_futures = {} working_urim_list = [] raw_urims = [] for uri in urims: # raw_urim = otmt.generate_raw_urim(uri) working_urim_list.append(uri) futures[uri] = futuressession.get(uri) # futures[raw_urim] = futuressession.get(raw_urim) working_starting_size = len(working_urim_list) def uri_generator(urilist): while len(urilist) > 0: uchoice = random.choice(urilist) yield uchoice for uri in uri_generator(working_urim_list): if futures[uri].done(): module_logger.debug("URI-M {} is done, processing...".format(uri)) if len(working_urim_list) % 100 == 0: module_logger.info("{}/{} mementos left to process".format(len(working_urim_list), working_starting_size)) try: r = futures[uri].result() if len(r.history) == 0: raw_urim = otmt.generate_raw_urim(uri) else: raw_urim = otmt.generate_raw_urim(r.url) raw_urims.append( raw_urim ) if 'memento-datetime' not in r.headers: self.addMementoError(uri, "URI-M {} does not produce a memento".format(uri)) else: # the content should be cached by the session # we just need to keep track of the URI-Ms for this run self.urimlist.append(uri) except Exception as e: self.addMementoError(uri, repr(e)) working_urim_list.remove(uri) del futures[uri] module_logger.info("done adding {} mementos, now adding corresponding {} raw mementos...".format( len(urims), len(raw_urims) )) working_raw_urim_list = [] for raw_urim in list(set(raw_urims)): working_raw_urim_list.append(raw_urim) raw_futures[raw_urim] = futuressession.get(raw_urim) working_rawurims_starting_size = len(working_raw_urim_list) # for raw_urim in uri_generator(working_raw_urim_list): while len(working_raw_urim_list) > 0: raw_urim = random.choice(working_raw_urim_list) module_logger.debug("fetching results for raw URI-M {}".format(raw_urim)) # module_logger.debug("are the keys the same as the working list: {}".format( set(working_raw_urim_list) == set(list(raw_futures.keys())) ) ) module_logger.debug("raw mementos working list size: {}".format(len(working_raw_urim_list))) module_logger.debug("raw mementos futures keys size: {}".format(len(raw_futures))) # try: # raw_futures[raw_urim] # except KeyError: # module_logger.error("{} is not in futures".format(raw_urim)) # module_logger.error("is it: {}".format( raw_urim in raw_futures )) # module_logger.error("") # module_logger.error("working list follows:") # module_logger.error(pp.pformat(working_raw_urim_list)) # module_logger.error("") # module_logger.error("raw_futures keys follows:") # module_logger.error(pp.pformat(list(raw_futures.keys()))) if raw_futures[raw_urim].done(): module_logger.debug("raw URI-M {} is done, processing...".format(raw_urim)) if len(working_raw_urim_list) % 100 == 0: module_logger.info("{}/{} raw mementos left to process".format(len(working_raw_urim_list), working_rawurims_starting_size)) try: r = raw_futures[raw_urim].result() if 'memento-datetime' not in r.headers: self.addMementoError(uri, "raw URI-M {} does not produce a memento".format(raw_urim)) else: # the content should be cached by the session # we just need to keep track of the raw URI-Ms for this run self.urimlist.append(raw_urim) except Exception as e: self.addMementoError(raw_urim, repr(e)) # module_logger.debug("removing {} from working raw URI-M list and raw futures keys".format(raw_urim)) working_raw_urim_list.remove(raw_urim) del raw_futures[raw_urim] # module_logger.debug("raw URI-M {} in working raw URI-M list still? {}".format( raw_urim, raw_urim in working_raw_urim_list )) time.sleep(1)
class Connection(object): """Represents a connection to a server """ TIMEOUT = 30 MAX_RETRIES = 256 MAX_WORKERS = multiprocessing.cpu_count() CHUNK_SIZE = 32 TOKEN = '' USER_AGENT = config.get('User-Agent', 'name', 'libmozdata') X_FORWARDED_FOR = utils.get_x_fwed_for_str( config.get('X-Forwarded-For', 'data', '')) # Error 429 is for 'Too many requests' => we retry STATUS_FORCELIST = [429] def __init__(self, base_url, queries=None, **kwargs): """Constructor Args: base_url (str): the server's url queries (Optional[Query]): the queries """ self.session = FuturesSession(max_workers=self.MAX_WORKERS) retries = Retry(total=Connection.MAX_RETRIES, backoff_factor=1, status_forcelist=Connection.STATUS_FORCELIST) self.session.mount(base_url, HTTPAdapter(max_retries=retries)) self.results = [] self.queries = queries if kwargs: if 'timeout' in kwargs: self.TIMEOUT = kwargs['timeout'] if 'max_retries' in kwargs: self.MAX_RETRIES = kwargs['max_retries'] if 'max_workers' in kwargs: self.MAX_WORKERS = kwargs['max_workers'] if 'user_agent' in kwargs: self.USER_AGENT = kwargs['user_agent'] if 'x_forwarded_for' in kwargs: self.X_FORWARDED_FOR = utils.get_x_fwded_for_str( kwargs['x_forwarded_for']) self.exec_queries() def __get_cb(self, query): """Get the callback to use when data have been retrieved Args: query (Query): the query Returns: function: the callback for the query """ def cb(sess, res): if res.status_code == 200: try: response = res.json() except: response = res.text if query.handlerdata is not None: query.handler(response, query.handlerdata) else: query.handler(response) else: print('Connection error:') print(' url: ', res.url) print(' text: ', res.text) return cb def wait(self): """Just wait that all the queries have been treated """ for r in self.results: r.result() def get_apikey(self): """Get the api key Returns: str: the api key """ return self.TOKEN def get_header(self): """Get the header to use each query Returns: dict: the header """ if self.X_FORWARDED_FOR: return { 'User-Agent': self.USER_AGENT, 'X-Forwarded-For': self.X_FORWARDED_FOR, 'Connection': 'close' } else: return {'User-Agent': self.USER_AGENT, 'Connection': 'close'} def get_auth(self): """Get the auth to use each query Returns: dict: the auth """ return None def exec_queries(self, queries=None): """Set and exec some queries Args: queries (Optional[Query]): the queries to exec """ if queries: self.queries = queries if self.queries: if isinstance(self.queries, Query): self.queries = [self.queries] header = self.get_header() auth = self.get_auth() for query in self.queries: cb = self.__get_cb(query) if query.params: if isinstance(query.params, dict): self.results.append( self.session.get(query.url, params=query.params, headers=header, auth=auth, verify=True, timeout=self.TIMEOUT, background_callback=cb)) else: for p in query.params: self.results.append( self.session.get(query.url, params=p, headers=header, auth=auth, verify=True, timeout=self.TIMEOUT, background_callback=cb)) else: self.results.append( self.session.get(query.url, headers=header, auth=auth, verify=True, timeout=self.TIMEOUT, background_callback=cb)) @staticmethod def chunks(l, chunk_size=CHUNK_SIZE): """Get chunk from a list Args: l (List): data to chunkify chunk_size (Optional[int]): the size of each chunk Yields: a chunk from the data """ for i in range(0, len(l), chunk_size): yield l[i:(i + chunk_size)]
class HTTPDriver(BaseDriver): """HTTPDriver The :class:`HTTPDriver` class reads SBP messages from an HTTP service for a device and writes out to a stream. This driver is like a file-handle with read and writes over two separately HTTP connections, but can also be enabled and disabled by its consumer. Parameters ---------- device_uid : uid Device unique id url : str HTTP endpoint retries : tuple Configure connect and read retry count. Defaults to (MAX_CONNECT_RETRIES, MAX_READ_RETRIES). timeout : tuple Configure connect and read timeouts. Defaults to (DEFAULT_CONNECT_TIMEOUT, DEFAULT_READ_TIMEOUT). """ def __init__( self, device_uid=None, url="https://broker.staging.skylark.swiftnav.com", retries=DEFAULT_RETRIES, timeout=DEFAULT_TIMEOUT, ): self._retry = Retry(connect=DEFAULT_RETRIES[0], read=DEFAULT_RETRIES[1], redirect=MAX_REDIRECTS, status_forcelist=[500], backoff_factor=DEFAULT_BACKOFF_FACTOR) self.url = url self.read_session = requests.Session() self.read_session.mount( "http://", HTTPAdapter(pool_connections=DEFAULT_POOLSIZE, pool_maxsize=DEFAULT_POOLSIZE, pool_block=DEFAULT_POOLBLOCK, max_retries=self._retry)) self.read_session.mount( "https://", HTTPAdapter(pool_connections=DEFAULT_POOLSIZE, pool_maxsize=DEFAULT_POOLSIZE, pool_block=DEFAULT_POOLBLOCK, max_retries=self._retry)) self.write_session = None self.device_uid = device_uid self.timeout = timeout self.read_response = None self.write_response = None self.source = None def flush(self): """File-flush wrapper (noop). """ pass def close(self): """File-handle close wrapper (noop). """ try: self.read_close() self.write_close() except: pass @property def write_ok(self): """ Are we connected for writes? """ # Note that self.write_response is either None or a Response # object, which cast to False for 4xx and 5xx HTTP codes. return bool(self.write_response) def connect_write(self, source, whitelist, device_uid=None, pragma=None): """Initialize a streaming write HTTP response. Manually connects the underlying file-handle. In the event of a network disconnection, use to manually reinitiate an HTTP session. Parameters ---------- source : sbp.client.handler.Handler Iterable source of SBP messages. whitelist : [int] Whitelist of messages to write """ header_device_uid = device_uid or self.device_uid headers = { 'Device-Uid': header_device_uid, 'Content-Type': BROKER_SBP_TYPE, 'Pragma': pragma } if not pragma: del headers['Pragma'] try: self.executor = ThreadPoolExecutor(max_workers=DEFAULT_POOLSIZE) self.write_session = FuturesSession(executor=self.executor) self.write_session.mount( "http://", HTTPAdapter(pool_connections=DEFAULT_POOLSIZE, pool_maxsize=DEFAULT_POOLSIZE, pool_block=DEFAULT_POOLBLOCK, max_retries=self._retry)) self.write_session.mount( "https://", HTTPAdapter(pool_connections=DEFAULT_POOLSIZE, pool_maxsize=DEFAULT_POOLSIZE, pool_block=DEFAULT_POOLBLOCK, max_retries=self._retry)) self.source = source.filter(whitelist) gen = (msg.pack() for msg, _ in self.source) self.write_session.put(self.url, data=gen, headers=headers) self.write_response = True except requests.exceptions.ConnectionError: msg = "Client connection error to %s with [PUT] headers %s" \ % (self.url, headers) warnings.warn(msg) except requests.exceptions.ConnectTimeout: msg = "Client connection timeout to %s with [PUT] headers %s" \ % (self.url, headers) warnings.warn(msg) except requests.exceptions.RetryError: msg = "Client retry error to %s with [PUT] headers %s" \ % (self.url, headers) warnings.warn(msg) except requests.exceptions.ReadTimeout: msg = "Client read timeout to %s with [PUT] headers %s" \ % (self.url, headers) warnings.warn(msg) return self.write_ok def write(self, data): """Write wrapper (noop). Actual stream is initiated by the write connection. Parameters ---------- data : object Data to write. """ pass def write_close(self): """File-handle close wrapper (noop). """ try: self.write_session.close() self.executor.shutdown(wait=False) self.source.breakiter() self.source = None self.executor = None self.write_session = None except: pass @property def read_ok(self): """ Are we connected for reads? """ return bool(self.read_response) def connect_read(self, device_uid=None, pragma=None): """Initialize a streaming read/write HTTP response. Manually connects the underlying file-handle. In the event of a network disconnection, use to manually reinitiate an HTTP session. """ header_device_uid = device_uid or self.device_uid headers = { 'Device-Uid': header_device_uid, 'Accept': BROKER_SBP_TYPE, 'Pragma': pragma } if not pragma: del headers['Pragma'] try: self.read_response = self.read_session.get(self.url, stream=True, headers=headers, timeout=self.timeout) except requests.exceptions.ConnectionError: msg = "Client connection error to %s with [GET] headers %s" \ % (self.url, headers) warnings.warn(msg) except requests.exceptions.ConnectTimeout: msg = "Client connection timeout to %s with [GET] headers %s" \ % (self.url, headers) warnings.warn(msg) except requests.exceptions.RetryError: msg = "Client retry error to %s with [GET] headers %s" \ % (self.url, headers) warnings.warn(msg) except requests.exceptions.ReadTimeout: msg = "Client read timeout to %s with [GET] headers %s" \ % (self.url, headers) warnings.warn(msg) return self.read_ok def read(self, size): """Read wrapper. If the client connection is closed or some other exception is thrown, raises an IOError. Parameters ---------- size : int Size to read (in bytes). Returns ---------- bytearray, or None """ if self.read_response is None or not self.device_uid: raise ValueError("Invalid/insufficient HTTP request parameters!") elif not self.read_ok or self.read_response.raw.closed: raise IOError("HTTP read closed?!") try: return self.read_response.raw.read(size) except: raise IOError("HTTP read error!") def read_close(self): """File-handle close wrapper (noop). """ try: self.read_response.close() self.read_response = None except: pass
class Connection(object): """Represents a connection to a server """ TIMEOUT = 30 MAX_RETRIES = 256 MAX_WORKERS = multiprocessing.cpu_count() CHUNK_SIZE = 32 TOKEN = '' # Error 429 is for 'Too many requests' => we retry STATUS_FORCELIST = [429] def __init__(self, base_url, queries=None, **kwargs): """Constructor Args: base_url (str): the server's url queries (Optional[Query]): the queries """ self.session = FuturesSession(max_workers=self.MAX_WORKERS) retries = Retry(total=Connection.MAX_RETRIES, backoff_factor=1, status_forcelist=Connection.STATUS_FORCELIST) self.session.mount(base_url, HTTPAdapter(max_retries=retries)) self.results = [] self.queries = queries if kwargs: if 'timeout' in kwargs: self.TIMEOUT = kwargs['timeout'] if 'max_retries' in kwargs: self.MAX_RETRIES = kwargs['max_retries'] if 'max_workers' in kwargs: self.MAX_WORKERS = kwargs['max_workers'] self.exec_queries() def __get_cb(self, query): """Get the callback to use when data have been retrieved Args: query (Query): the query Returns: function: the callback for the query """ def cb(sess, res): if res.status_code == 200: try: response = res.json() except: response = res.text if query.handlerdata is not None: query.handler(response, query.handlerdata) else: query.handler(response) else: print('Connection error:') print(' url: ', res.url) print(' text: ', res.text) return cb def wait(self): """Just wait that all the queries have been treated """ for r in self.results: r.result() def get_apikey(self): """Get the api key Returns: str: the api key """ return self.TOKEN def get_header(self): """Get the header to use each query Returns: dict: the header """ return {'User-Agent': 'clouseau', 'Connection': 'close'} def get_auth(self): """Get the auth to use each query Returns: dict: the auth """ return None def exec_queries(self, queries=None): """Set and exec some queries Args: queries (Optional[Query]): the queries to exec """ if queries: self.queries = queries if self.queries: if isinstance(self.queries, Query): self.queries = [self.queries] header = self.get_header() auth = self.get_auth() for query in self.queries: cb = self.__get_cb(query) if query.params: if isinstance(query.params, dict): self.results.append(self.session.get(query.url, params=query.params, headers=header, auth=auth, verify=True, timeout=self.TIMEOUT, background_callback=cb)) else: for p in query.params: self.results.append(self.session.get(query.url, params=p, headers=header, auth=auth, verify=True, timeout=self.TIMEOUT, background_callback=cb)) else: self.results.append(self.session.get(query.url, headers=header, auth=auth, verify=True, timeout=self.TIMEOUT, background_callback=cb)) @staticmethod def chunks(l, chunk_size=CHUNK_SIZE): """Get chunk from a list Args: l (List): data to chunkify chunk_size (Optional[int]): the size of each chunk Yields: a chunk from the data """ for i in range(0, len(l), chunk_size): yield l[i:(i + chunk_size)]
day.update({'type_id': resp.type_id, 'region_id': resp.region_id}) writer.writerow(day) except Exception as e: logger.exception(e) if __name__ == '__main__': session = FuturesSession(max_workers=50) session.headers.update({'UserAgent': 'Fuzzwork Market Monitor'}) # https://stackoverflow.com/questions/40417503/applying-retry-on-grequests-in-python retries = Retry(total=50, backoff_factor=0.5, status_forcelist=[500, 502, 503, 504], raise_on_redirect=True, raise_on_status=True) session.mount('http://', HTTPAdapter(max_retries=retries, pool_maxsize=50)) session.mount('https://', HTTPAdapter(max_retries=retries, pool_maxsize=50)) regions = pd.read_csv('data/mapRegions.csv.bz2') types = pd.read_csv('data/invTypes.csv.bz2', usecols=['typeID', 'published', 'marketGroupID']) types = types.query('published == 1 and marketGroupID != "None"') with gzip.open('data/history-latest.csv.gz', 'wt') as csvfile: fieldnames = [ 'type_id', 'region_id', 'date', 'lowest', 'highest', 'average', 'volume', 'order_count' ] writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader()
addressEndpoint = '/api/Wallet/addresses' walletEndpoint = '/api/Wallet/files' buildTxEndpoint= '/api/Wallet/build-transaction' sendTxEndpoint= '/api/Wallet/send-transaction' mnemonicEndpoint='/api/Wallet/mnemonic' createWalletEndpoint='/api/Wallet/create' restoreWalletEndpoint='/api/Wallet/recover' nodeStatusEndpoint='/api/Dashboard/Stats' apiSession = requests.session() futuresSession=FuturesSession() retryCount=Retry(total=3,backoff_factor=0.1,status_forcelist=(400, 500, 502, 504)) apiSession.mount('http://', HTTPAdapter(max_retries=retryCount)) apiSession.mount('https://', HTTPAdapter(max_retries=retryCount)) futuresSession.mount('http://', HTTPAdapter(max_retries=retryCount)) futuresSession.mount('https://', HTTPAdapter(max_retries=retryCount)) #refresh interval secToRefresh=int(xConfig['SETTINGS']['REFRESH_INTERVAL']) secCounter=0 #GUI QFontDatabase.addApplicationFont(":/base/Roboto-Regular.ttf") app.setFont(QFont("Roboto")) mainWin=QStackedWidget() walletPage=QWidget() dashboardPage=QWidget() sendPage=QWidget() settingsPage=QWidget() createRestorePage=QWidget()
class Dota_API(): api_keys = [ 'FE70CE9FC0D6D99279498CE852587F59', '2FEC67172AAC0C393EC209A225A7E51E' ] api_key_num = 1 api_key = api_keys[api_key_num] ips = ['162.213.199.143', '162.213.199.31'] ip_num = 0 data_source = 4 headers = {'User-Agent': 'Script by Grue'} errors = 0 session = FuturesSession() session.mount('http://', source.SourceAddressAdapter(ips[ip_num])) def matches_get(self, req_type=1, n_id='', **kwargs): if (req_type < 4): url = 'https://api.steampowered.com/IDOTA2Match_570/' url += 'GetMatchHistoryBySequenceNum' if req_type == 1 else 'GetMatchHistory' url += '/V001/?key=' + self.api_key + '&min_players=10&' if (req_type == 1): url += 'start_at_match_seq_num' elif (req_type == 2): url += 'start_at_match_id' elif (req_type == 3): url += 'account_id' url += '=' + str(n_id) if req_type != 1: url += '&skill=3' elif req_type == 4: url = 'http://www.dotabuff.com/matches/' + str(n_id) elif req_type == 5: url = 'http://dotamax.com/match/detail/' + str(n_id) elif req_type == 6: url = 'http://api.opendota.com/api/matches/' + str(n_id) return dict(req=self.session.get(url, timeout=7, headers=self.headers), req_type=req_type, n_id=n_id, url=url, ip_num=self.ip_num) def matches_result(self, request): req = request['req'] try: res = req.result() except (requests.ConnectionError, requests.Timeout, socket.timeout) as e: return self.retry_request(request) if (res.status_code != 200): if (res.status_code == 404 and request['req_type'] == 6): #not found return None self.session = FuturesSession() # if last IP cycle through data sources if self.ip_num == len(self.ips) - 1: if request['req_type'] == 4 or request['req_type'] == 6: request['req_type'] = (4 if (request['req_type'] == 6) else 6) self.data_source = request['req_type'] self.ip_num = (request['ip_num'] + 1) % len(self.ips) self.session.mount( 'http://', source.SourceAddressAdapter(self.ips[self.ip_num])) return self.retry_request(request, sleep=1) if request['req_type'] == 4: return self.parse_skill(res) if request['req_type'] == 5: return self.parse_dota_max(res) if request['req_type'] == 6: #switch IPs and wait 0.5 seconds so that it is 1 request per second per IP time.sleep(1 / len(self.ips)) self.ip_num = (request['ip_num'] + 1) % len(self.ips) self.session.mount( 'http://', source.SourceAddressAdapter(self.ips[self.ip_num])) return self.parse_opendota_skill(res) try: matches = res.json()['result']['matches'] except: if (request['req_type'] == 3): return [] return self.retry_request(request) if len(matches) == 0: return self.retry_request(request) return matches def retry_request(self, request, sleep=7): #print(request) self.errors += 1 time.sleep(sleep) return self.matches_result(self.matches_get(**request)) def parse_skill(self, response): html = response.text end_index = html.find(' Skill</dd>') if end_index > -1: html = html[:end_index] else: return None start_index = html.rfind('<dd>') if start_index > -1: html = html[start_index + 4:] else: return None return html def parse_dota_max(self, response): html = response.text html_split = html.split('<td><font style="color: #f0a868;">') if len(html_split) > 1: html = html_split[1] else: return None html_split = html.split('</font></td>') if len(html_split) > 1: html = html_split[0] else: return None return html def parse_opendota_skill(self, response): m = response.json() if 'skill' not in m: return None if m['skill'] == 3: return 'Very High' return m['skill']
class AsyncConnection(AbstractConnection): def __init__(self, *, base_url, disable_ssl_certificate, token_manager, retries, max_requests_workers=6, proxy_url=None): super().__init__(base_url=base_url, disable_ssl_certificate=disable_ssl_certificate, token_manager=token_manager, retries=retries) executor = cf.ThreadPoolExecutor(max_workers=max_requests_workers) adapter_kwargs = { 'pool_connections': max_requests_workers, 'pool_maxsize': max_requests_workers, 'max_retries': self._retries, 'pool_block': True } self._asession = FuturesSession(executor=executor) self._asession.mount('https://', HTTPAdapter(**adapter_kwargs)) self._asession.mount('http://', HTTPAdapter(**adapter_kwargs)) if proxy_url is not None: self._asession.proxies = { 'http': proxy_url, 'https': proxy_url, } self._access_token_lock = Lock() self._max_requests_workers = max_requests_workers @property def executor(self): return self._asession.executor @property def max_request_workers(self): return self._max_requests_workers def _add_authorization_maybe(self, headers: dict, url: str): with self._access_token_lock: super()._add_authorization_maybe(headers, url) def post(self, path, headers=None, callback=None, data=None, timeout=30.0): url = urljoin(self._base_url, path) params = { 'method': 'POST', 'url': url, 'headers': headers, 'data': data, 'verify': (not self._disable_ssl_certificate), 'timeout': timeout } return self._send_request(params, on_finish_callback=callback) def put(self, path, headers=None, callback=None, files=None, timeout=30.0): url = urljoin(self._base_url, self._encode_spaces(path)) params = { 'method': 'PUT', 'url': url, 'headers': headers, 'files': files, 'verify': (not self._disable_ssl_certificate), 'timeout': timeout } return self._send_request(params=params, on_finish_callback=callback) def _send_request(self, params, on_finish_callback): params['headers'] = params['headers'] or {} self._add_authorization_maybe(params['headers'], params['url']) self._add_user_agent(params['headers']) try: token = params['headers']['Authorization'].split( 'Bearer')[1].strip() except KeyError: token = None def extended_callback(response, *args, **kwargs): if response.status_code == 401: LOGGER.debug('Got a 401 status') skip = self._skip_token_renewal(params['url']) if not skip: with self._access_token_lock: # block concurrent send requests renewed = (token != self._token_manager.token.access_token) if renewed: LOGGER.debug('Token already renewed') else: self._renew_token() if on_finish_callback: on_finish_callback(response) c_params = params c_params['hooks'] = {'response': extended_callback} LOGGER.debug('Making request {} to {}'.format(params['method'], params['url'])) return self._asession.request(**c_params)
class MultiRequest(object): """Wraps requests-futures to make simultaneous HTTP requests. Can use a RateLimiter to limit # of outstanding requests. Can also use AvailabilityLimiter to limit total # of request issuance threshold. `multi_get` and `multi_post` try to be smart about how many requests to issue: * One url & one param - One request will be made. * Multiple url & one query param - Multiple requests will be made, with differing urls and the same query param. * Multiple url & multiple query params - Multiple requests will be made, with the same url and differing query params. """ _VERB_GET = 'GET' _VERB_POST = 'POST' def __init__( self, default_headers=None, max_requests=10, rate_limit=0, req_timeout=None, max_retry=10, total_retry=100, drop_404s=False, ): """Create the MultiRequest. Args: default_headers - A dict of headers which will be added to every request max_requests - Maximum number of requests to issue at once rate_limit - Maximum number of requests to issue per second req_timeout - Maximum number of seconds to wait without reading a response byte before deciding an error has occurred max_retry - The total number of attempts to retry a single batch of requests total_retry - The total number of request retries that can be made through the entire session Note there is a difference between `max_retry` and `total_retry`: - `max_retry` refers to how many times a batch of requests will be re-issued collectively - `total_retry` refers to a limit on the total number of outstanding requests made Once the latter is exhausted, no failed request within the whole session will be retried. """ self._default_headers = default_headers self._max_requests = max_requests self._req_timeout = req_timeout or 25.0 self._max_retry = max_retry self._drop_404s = drop_404s self._rate_limiter = RateLimiter(rate_limit) if rate_limit else None self._availability_limiter = AvailabilityLimiter( total_retry) if total_retry else None self._session = FuturesSession(max_workers=max_requests) retries = Retry(total=0, status_forcelist=[500, 502, 503, 504], raise_on_status=True) self._session.mount( 'https://', SSLAdapter( max_retries=retries, pool_maxsize=max_requests, pool_connections=max_requests, ), ) def multi_get(self, urls, query_params=None, to_json=True, file_download=False): """Issue multiple GET requests. Args: urls - A string URL or list of string URLs query_params - None, a dict, or a list of dicts representing the query params to_json - A boolean, should the responses be returned as JSON blobs file_download - A boolean, whether a file download is expected Returns: a list of dicts if to_json is set of requests.response otherwise. Raises: InvalidRequestError - Can not decide how many requests to issue. """ return self._multi_request( MultiRequest._VERB_GET, urls, query_params, data=None, to_json=to_json, file_download=file_download, ) def multi_post(self, urls, query_params=None, data=None, to_json=True, send_as_file=False): """Issue multiple POST requests. Args: urls - A string URL or list of string URLs query_params - None, a dict, or a list of dicts representing the query params data - None, a dict or string, or a list of dicts and strings representing the data body. to_json - A boolean, should the responses be returned as JSON blobs send_as_file - A boolean, should the data be sent as a file. Returns: a list of dicts if to_json is set of requests.response otherwise. Raises: InvalidRequestError - Can not decide how many requests to issue. """ return self._multi_request( MultiRequest._VERB_POST, urls, query_params, data, to_json=to_json, send_as_file=send_as_file, ) def _create_request(self, verb, url, query_params=None, data=None, send_as_file=False): """Helper method to create a single post/get requests. Args: verb - MultiRequest._VERB_POST or MultiRequest._VERB_GET url - A string URL query_params - None or a dict data - None or a string or a dict send_as_file - A boolean, should the data be sent as a file. Returns: requests.PreparedRequest Raises: InvalidRequestError - if an invalid verb is passed in. """ # Prepare a set of kwargs to make it easier to avoid missing default params. kwargs = { 'headers': self._default_headers, 'params': query_params, 'timeout': self._req_timeout, } if MultiRequest._VERB_POST == verb: if send_as_file: kwargs['files'] = {'file': data} else: kwargs['data'] = data return PreparedRequest(partial(self._session.post, url, **kwargs), url) elif MultiRequest._VERB_GET == verb: return PreparedRequest(partial(self._session.get, url, **kwargs), url) else: raise InvalidRequestError('Invalid verb {0}'.format(verb)) def _zip_request_params(self, urls, query_params, data): """Massages inputs and returns a list of 3-tuples zipping them up. This is all the smarts behind deciding how many requests to issue. It's fine for an input to have 0, 1, or a list of values. If there are two inputs each with a list of values, the cardinality of those lists much match. Args: urls - 1 string URL or a list of URLs query_params - None, 1 dict, or a list of dicts data - None, 1 dict or string, or a list of dicts or strings Returns: A list of 3-tuples (url, query_param, data) Raises: InvalidRequestError - if cardinality of lists does not match """ # Everybody gets to be a list if not isinstance(urls, list): urls = [urls] if not isinstance(query_params, list): query_params = [query_params] if not isinstance(data, list): data = [data] # Counts must not mismatch url_count = len(urls) query_param_count = len(query_params) data_count = len(data) max_count = max(url_count, query_param_count, data_count) if (max_count > url_count > 1 or max_count > query_param_count > 1 or max_count > data_count > 1): raise InvalidRequestError( 'Mismatched parameter count url_count:{0} query_param_count:{1} data_count:{2} max_count:{3}', url_count, query_param_count, data_count, max_count, ) # Pad out lists if url_count < max_count: urls = urls * max_count if query_param_count < max_count: query_params = query_params * max_count if data_count < max_count: data = data * max_count return list(zip(urls, query_params, data)) def _wait_for_response(self, requests): """Issues a batch of requests and waits for the responses. If some of the requests fail it will retry the failed ones up to `_max_retry` times. Args: requests - A list of requests Returns: A list of `requests.models.Response` objects Raises: InvalidRequestError - if any of the requests returns "403 Forbidden" response """ failed_requests = [] responses_for_requests = OrderedDict.fromkeys(requests) for retry in range(self._max_retry): try: logging.debug('Try #{0}'.format(retry + 1)) self._availability_limiter.map_with_retries( requests, responses_for_requests) failed_requests = [] for request, response in responses_for_requests.items(): if self._drop_404s and response is not None and response.status_code == 404: logging.warning( 'Request to {0} failed with status code 404, dropping.' .format(request.url)) elif not response: failed_requests.append((request, response)) if not failed_requests: break logging.warning( 'Try #{0}. Expected {1} successful response(s) but only got {2}.' .format( retry + 1, len(requests), len(requests) - len(failed_requests), )) # retry only for the failed requests requests = [fr[0] for fr in failed_requests] except InvalidRequestError: raise except Exception as e: # log the exception for the informative purposes and pass to the next iteration logging.exception( 'Try #{0}. Exception occured: {1}. Retrying.'.format( retry + 1, e)) pass if failed_requests: logging.warning( 'Still {0} failed request(s) after {1} retries:'.format( len(failed_requests), self._max_retry, )) for failed_request, failed_response in failed_requests: if failed_response is not None: # in case response text does contain some non-ascii characters failed_response_text = failed_response.text.encode( 'ascii', 'xmlcharrefreplace') logging.warning( 'Request to {0} failed with status code {1}. Response text: {2}' .format( failed_request.url, failed_response.status_code, failed_response_text, )) else: logging.warning( 'Request to {0} failed with None response.'.format( failed_request.url)) return list(responses_for_requests.values()) def _handle_file_download(self, response): name = None data = None try: name = re.findall('filename=(.+)', response.headers['content-disposition'])[0] data = urlsafe_b64encode( response.text.encode('utf-8')).decode('utf-8') except Exception: logging.exception('Unable to extract download data for {} '.format( response.request.url)) return {'data': {'id': name, 'text': data}} def _convert_to_json(self, response): """Converts response to JSON. If the response cannot be converted to JSON then `None` is returned. Args: response - An object of type `requests.models.Response` Returns: Response in JSON format if the response can be converted to JSON. `None` otherwise. """ try: return response.json() except ValueError: logging.warning( 'Expected response in JSON format from {0} but the actual response text is: {1}' .format( response.request.url, response.text, )) return None def _multi_request(self, verb, urls, query_params, data, to_json=True, send_as_file=False, file_download=False): """Issues multiple batches of simultaneous HTTP requests and waits for responses. Args: verb - MultiRequest._VERB_POST or MultiRequest._VERB_GET urls - A string URL or list of string URLs query_params - None, a dict, or a list of dicts representing the query params data - None, a dict or string, or a list of dicts and strings representing the data body. to_json - A boolean, should the responses be returned as JSON blobs Returns: If multiple requests are made - a list of dicts if to_json, a list of requests responses otherwise If a single request is made, the return is not a list Raises: InvalidRequestError - if no URL is supplied or if any of the requests returns 403 Access Forbidden response """ if not urls: raise InvalidRequestError('No URL supplied') # Break the params into batches of request_params request_params = self._zip_request_params(urls, query_params, data) batch_of_params = [ request_params[pos:pos + self._max_requests] for pos in range(0, len(request_params), self._max_requests) ] # Iteratively issue each batch, applying the rate limiter if necessary all_responses = [] for param_batch in batch_of_params: if self._rate_limiter: self._rate_limiter.make_calls(num_calls=len(param_batch)) prepared_requests = [ self._create_request( verb, url, query_params=query_param, data=datum, send_as_file=send_as_file, ) for url, query_param, datum in param_batch ] responses = self._wait_for_response(prepared_requests) for response in responses: if response and not file_download: all_responses.append( self._convert_to_json(response ) if to_json else response) elif file_download: all_responses.append(self._handle_file_download(response)) else: all_responses.append(None) return all_responses def post_file(self, url, file, to_json=True): request = self._create_request(MultiRequest._VERB_POST, url) return request @classmethod def error_handling(cls, fn): """Decorator to handle errors""" def wrapper(*args, **kwargs): try: result = fn(*args, **kwargs) return result except InvalidRequestError as e: write_exception(e) if hasattr(e, 'request'): write_error_message('request {0}'.format(repr(e.request))) if hasattr(e, 'response'): write_error_message('response {0}'.format(repr( e.response))) raise e return wrapper
class Track(object): CRASH_STATS_URL = 'https://crash-stats.mozilla.com' SUPERSEARCH_URL = CRASH_STATS_URL + '/api/SuperSearch' TIMEOUT = 5 MAX_RETRIES = 5 MAX_WORKERS = multiprocessing.cpu_count() HG_PATTERN = re.compile('hg:hg.mozilla.org/mozilla-central:([^:]*):([a-z0-9]+)') def __init__(self, signature, day, day_delta = 1, credentials = None): self.results = [ ] self.credentials = credentials self.has_results = False self.day_delta = day_delta self.signature = signature self.info = { } self.date = utils.get_date_ymd(day) self.session = FuturesSession(max_workers = self.MAX_WORKERS) self.session.mount(self.CRASH_STATS_URL, HTTPAdapter(max_retries = self.MAX_RETRIES)) self.__get_info() def get(self): if not self.has_results: for r in self.results: r.result() self.has_results = True return self.info def has_addons(self): return len(self.get()['addons']) != 0 def __get_apikey(self): if self.credentials: return self.credentials['tokens'][self.CRASH_STATS_URL] else: return '' @staticmethod def __get_stats(info, field): l = info[field] total = float(info['total']) stats = { } for e in l: stats[e['term']] = utils.percent(float(e['count']) / total) return stats @staticmethod def __get_system_memory_use_mean(info): l = info['system_memory_use_percentage'] total = float(info['total']) l = [(float(e['count']) / total, float(e['term'])) for e in l] m = 0. for e in l: m += e[0] * e[1] v = 0. for e in l: v += e[0] * (m - e[1]) ** 2 return {'mean': utils.simple_percent(round(m, 0)), 'stddev': utils.simple_percent(round(math.sqrt(v), 0))} @staticmethod def __is_weird_address(addr, cpu_name): if addr == '0x0': return True if utils.is64(cpu_name): if len(addr) <= 10: val = long(addr, 16) if val <= 1L << 16: # val <= 0xffff (ie: first 64k) return True elif addr.startswith('0xffffffff'): addr = addr[10:] # 10 == len('0xffffffff') val = long(addr, 16) if val >= ((1L << 32) - (1L << 16)): # val >= 0xfffffffffff0000 (ie: last 64k) return True
it is simply not checked Generously provided by Juan Luis Boya """ def cert_verify(self, conn, *args, **kwargs): """ Avoids the verification of the SSL Hostname field :param Connection conn: The connection object """ super(NotCheckingHostnameHTTPAdapter, self).cert_verify(conn, *args, **kwargs) conn.assert_hostname = False # By changing the adapter no hostname is checked futures_session = FuturesSession() futures_session.mount('https://', NotCheckingHostnameHTTPAdapter()) #Creation of the temporal directory if it does not exists if not os.path.exists(conf.TMPDIR): os.makedirs(conf.TMPDIR) __UPLOADS__ = conf.TMPDIR # temporal directory were files will be stored open_ws = set() #Set of the current alive websockets class BaseHandler(RequestHandler): """ The base class which the rest of HTTP handlers extends. Provides secure cookie decryption and error handling """ def get_current_user(self):
class BaseConnection(object): """Base Connection Class.""" def __init__(self, debug=False, method='GET', proxy_host=None, timeout=20, proxy_port=80, parallel=None, escape_xml=False, **kwargs): if debug: set_stream_logger() self.response = None self.request = None self.verb = None self.config = None self.debug = debug self.method = method self.timeout = timeout self.proxy_host = proxy_host self.proxy_port = proxy_port self.escape_xml = escape_xml self.datetime_nodes = [] self._list_nodes = [] self.proxies = dict() if self.proxy_host: proxy = 'http://%s:%s' % (self.proxy_host, self.proxy_port) self.proxies = {'http': proxy, 'https': proxy} self.session = FuturesSession() self.session.mount('http://', HTTPAdapter(max_retries=3)) self.session.mount('https://', HTTPAdapter(max_retries=3)) self.parallel = parallel self.base_list_nodes = [] self.datetime_nodes = [] self._reset() def debug_callback(self, debug_type, debug_message): log.debug('type: ' + str(debug_type) + ' message' + str(debug_message)) def v(self, *args, **kwargs): return getValue(self.response.dict(), *args, **kwargs) def getNodeText(self, nodelist): return getNodeTextUtils(nodelist) def _reset(self): self.response = None self.request = None self.verb = None self._list_nodes = [] self._request_id = None self._request_dict = {} self._time = time.time() self._response_content = None self._response_dom = None self._response_obj = None self._response_soup = None self._response_dict = None self._response_error = None self._resp_body_errors = [] self._resp_body_warnings = [] self._resp_codes = [] def _add_prefix(self, nodes, verb): if verb: for i, v in enumerate(nodes): if not nodes[i].startswith(verb.lower()): nodes[i] = "%sresponse.%s" % (verb.lower(), nodes[i].lower()) def execute(self, verb, data=None, list_nodes=[], verb_attrs=None, files=None): "Executes the HTTP request." log.debug('execute: verb=%s data=%s' % (verb, data)) self._reset() self._list_nodes += list_nodes self._add_prefix(self._list_nodes, verb) if hasattr(self, 'base_list_nodes'): self._list_nodes += self.base_list_nodes self.build_request(verb, data, verb_attrs, files) self.execute_request() if hasattr(self.response, 'content'): self.process_response() self.error_check() log.debug('total time=%s' % (time.time() - self._time)) return self.response def build_request(self, verb, data, verb_attrs, files=None): self.verb = verb self._request_dict = data self._request_id = uuid.uuid4() url = self.build_request_url(verb) headers = self.build_request_headers(verb) headers.update({ 'User-Agent': UserAgent, 'X-EBAY-SDK-REQUEST-ID': str(self._request_id) }) # if we are adding files, we ensure there is no Content-Type header already defined # otherwise Request will use the existing one which is likely not to be multipart/form-data # data must also be a dict so we make it so if needed requestData = self.build_request_data(verb, data, verb_attrs) if files: del (headers['Content-Type']) if isinstance(requestData, str): # pylint: disable-msg=E0602 requestData = {'XMLPayload': requestData} request = Request( self.method, url, data=smart_encode_request_data(requestData), headers=headers, files=files, ) self.request = request.prepare() def build_request_headers(self, verb): return {} def build_request_data(self, verb, data, verb_attrs): return "" def build_request_url(self, verb): url = "%s://%s%s" % (HTTP_SSL[self.config.get('https', False)], self.config.get('domain'), self.config.get('uri')) return url def execute_request(self): log.debug("REQUEST (%s): %s %s" % (self._request_id, self.request.method, self.request.url)) log.debug('headers=%s' % self.request.headers) log.debug('body=%s' % self.request.body) if self.parallel: self.parallel._add_request(self) return None self.response = self.session.send(self.request, verify=True, proxies=self.proxies, timeout=self.timeout, allow_redirects=True) log.debug('RESPONSE (%s):' % self._request_id) log.debug('elapsed time=%s' % self.response.elapsed) log.debug('status code=%s' % self.response.status_code) log.debug('headers=%s' % self.response.headers) log.debug('content=%s' % self.response.text) def process_response(self, parse_response=True): """Post processing of the response""" self.response = Response(self.response, verb=self.verb, list_nodes=self._list_nodes, datetime_nodes=self.datetime_nodes, parse_response=parse_response) self.session.close() # set for backward compatibility self._response_content = self.response.content if self.response.status_code != 200: self._response_error = self.response.reason def error_check(self): estr = self.error() if estr and self.config.get('errors', True): log.error(estr) raise ConnectionError(estr, self.response) def response_codes(self): return self._resp_codes def response_status(self): "Retuns the HTTP response status string." return self.response.reason def response_code(self): "Returns the HTTP response status code." return self.response.status_code def response_content(self): return self.response.content def response_soup(self): "Returns a BeautifulSoup object of the response." if not self._response_soup: try: from bs4 import BeautifulStoneSoup except ImportError: from BeautifulSoup import BeautifulStoneSoup log.warn( 'DeprecationWarning: BeautifulSoup 3 or earlier is deprecated; install bs4 instead\n' ) self._response_soup = BeautifulStoneSoup( smart_decode(self.response_content)) return self._response_soup def response_obj(self): log.warn('response_obj() DEPRECATED, use response.reply instead') return self.response.reply def response_dom(self): """ Deprecated: use self.response.dom() instead Returns the response DOM (xml.dom.minidom). """ log.warn('response_dom() DEPRECATED, use response.dom instead') if not self._response_dom: dom = None content = None try: if self.response.content: regex = re.compile(b'xmlns="[^"]+"') content = regex.sub(b'', self.response.content) else: content = "<%sResponse></%sResponse>" % (self.verb, self.verb) dom = parseString(content) self._response_dom = dom.getElementsByTagName(self.verb + 'Response')[0] except ExpatError as e: raise ConnectionResponseError( "Invalid Verb: %s (%s)" % (self.verb, e), self.response) except IndexError: self._response_dom = dom return self._response_dom def response_dict(self): "Returns the response dictionary." log.warn( 'response_dict() DEPRECATED, use response.dict() or response.reply instead' ) return self.response.reply def response_json(self): "Returns the response JSON." log.warn('response_json() DEPRECATED, use response.json() instead') return self.response.json() def _get_resp_body_errors(self): """Parses the response content to pull errors. Child classes should override this method based on what the errors in the XML response body look like. They can choose to look at the 'ack', 'Errors', 'errorMessage' or whatever other fields the service returns. the implementation below is the original code that was part of error() """ if self._resp_body_errors and len(self._resp_body_errors) > 0: return self._resp_body_errors errors = [] if self.verb is None: return errors dom = self.response.dom() if dom is None: return errors return [] def error(self): "Builds and returns the api error message." error_array = [] if self._response_error: error_array.append(self._response_error) error_array.extend(self._get_resp_body_errors()) if len(error_array) > 0: # Force all errors to be unicode in a proper way error_array = [smart_decode(smart_encode(e)) for e in error_array] error_string = u"{verb}: {message}".format( verb=self.verb, message=u", ".join(error_array)) return error_string return None def opendoc(self): webbrowser.open(self.config.get('doc_url'))
class BZInfo(object): BZ_URL = 'https://bugzilla.mozilla.org' API_URL = BZ_URL + '/rest/bug' TIMEOUT = 60 MAX_RETRIES = 5 MAX_WORKERS = multiprocessing.cpu_count() CHUNK_SIZE = 8 def __init__(self, bugids, credentials = None): self.results = [ ] self.credentials = credentials self.bugids = map(str, bugids) self.info = { } for bugid in self.bugids: self.info[bugid] = { 'ownership': [], 'reviewers': set(), 'commenters': { }, 'authorized': False } self.session = FuturesSession(max_workers = self.MAX_WORKERS) self.session.mount(self.BZ_URL, HTTPAdapter(max_retries = self.MAX_RETRIES)) self.reply_pattern = re.compile('^\(In reply to .* comment #([0-9]+)\)') self.dupbug_pattern = re.compile('\*\*\* Bug [0-9]+ has been marked as a duplicate of this bug. \*\*\*') self.review_pattern= re.compile('review\?\(([^\)]+)\)') self.needinfo_pattern= re.compile('needinfo\?\(([^\)]+)\)') self.feedback_pattern= re.compile('feedback\?\(([^\)]+)\)') self.__get_info() self.__analyze_history() self.__analyze_comment() def get(self): for r in self.results: r.result() return self.info def get_best_collaborator(self): # a collaboration between A & B is when A reviews a patch of B (or reciprocally) # in term of graph: # - each node represents a reviewer or a writter (owner) # - each edge represents a collaboration # here we count the degree of each node and find out who's the best collaborator # TODO: use this graph to get other metrics (??) # it could be interesting to weight each contribution according to its date # someone who made 20 contribs recently is probably better than someone 50 contribs # two years ago... # TODO: We could weight a contrib with a gaussian which depends to the time collaborations = { } for info in self.get().itervalues(): if info['authorized']: owner = info['owner'] if owner not in collaborations: collaborations[owner] = 0 reviewers = info['reviewers'] feedbacks = info['feedbacks'] collabs = set() if reviewers and owner in reviewers: collabs |= reviewers[owner] if feedbacks and owner in feedbacks: collabs |= feedbacks[owner] if collabs: collaborations[owner] += len(collabs) for person in collabs: collaborations[person] = collaborations[person] + 1 if person in collaborations else 1 # maybe we should compute the percentage of collaborations just to give an idea return utils.get_best(collaborations) def get_best_component_product(self): # Just get stats on components and products comps_prods = { } for info in self.get().itervalues(): if info['authorized']: comp_prod = (info['component'], info['product']) comps_prods[comp_prod] = comps_prods[comp_prod] + 1 if comp_prod in comps_prods else 1 if comps_prods: return utils.get_best(comps_prods) else: return None def __get_apikey(self): if self.credentials: return self.credentials['tokens'][self.BZ_URL] else: return '' def __info_cb(self, sess, res): bugs = res.json()['bugs'] for bug in bugs: self.info[str(bug['id'])].update({ 'authorized': True, 'severity': bug['severity'], 'votes': bug['votes'], 'component': bug['component'], 'product': bug['product'], 'nbcc': len(bug['cc']), 'reporter': bug['creator'], 'owner': bug['assigned_to_detail']['email']}) def __get_info(self): def chunks(): for i in range(0, len(self.bugids), self.CHUNK_SIZE): yield self.bugids[i:(i + self.CHUNK_SIZE)] for bugids in chunks(): bugids = ','.join(map(str, bugids)) self.results.append(self.session.get(self.API_URL, params = {'api_key': self.__get_apikey(), 'id': bugids}, verify = True, timeout = self.TIMEOUT, background_callback = self.__info_cb)) def __history_cb(self, sess, res): if res.status_code == 200: json = res.json() ownership = [] reviewers = { } feedbacks = { } if 'bugs' in json and json['bugs']: bug = json['bugs'][0] bugid = str(bug['id']) history = bug['history'] for h in history: who = h['who'] owner = None changes = h['changes'] for change in changes: nam = change['field_name'] rem = change['removed'] add = change['added'] if nam == 'status': if rem == 'NEW' and add == 'ASSIGNED': owner = who elif nam == 'assigned_to': owner = add elif nam == 'flagtypes.name': # Get the reviewers for m in self.review_pattern.finditer(add): if who in reviewers: reviewers[who].add(m.group(1)) else: reviewers[who] = set([m.group(1)]) # Get people pinged for feedback for m in self.feedback_pattern.finditer(add): if who in feedbacks: feedbacks[who].add(m.group(1)) else: feedbacks[who] = set([m.group(1)]) if owner and (not ownership or ownership[-1]['owner'] != owner): ownership.append({ 'owner': owner, 'touch_by': who, 'touch_when': h['when']} ) self.info[bugid].update({ 'ownership': ownership, 'reviewers': reviewers, 'feedbacks': feedbacks}) def __analyze_history(self): for bugid in self.bugids: self.results.append(self.session.get(self.API_URL + '/' + bugid + '/history', params = { 'api_key': self.__get_apikey() }, timeout = self.TIMEOUT, background_callback = self.__history_cb)) def __comment_cb(self, sess, res): if res.status_code == 200: json = res.json() if 'bugs' in json: bugs = json['bugs'] keys = bugs.keys() bugid = '' for key in keys: if isinstance(key, basestring) and key.isdigit(): bugid = key break if bugid: commenters = { } bug = bugs[bugid] if 'comments' in bug: comments = bug['comments'] authors = [] for comment in comments: text = comment['text'] if not self.dupbug_pattern.match(text): author = comment['author'] authors.append(author) if author not in commenters: commenters[author] = [] for m in self.reply_pattern.finditer(comment['raw_text']): n = int(m.group(1)) if n >= 1 and n <= len(authors): commenters[authors[n - 1]].append(author) self.info[bugid].update({'commenters': commenters}) def __analyze_comment(self): for bugid in self.bugids: self.results.append(self.session.get(self.API_URL + '/' + bugid + '/comment', params = {'api_key': self.__get_apikey()}, timeout = self.TIMEOUT, background_callback = self.__comment_cb))
def retrieve(self, catalog, *, dry_run=False, media_type=''): if not dry_run: distributions = Distribution.objects.filter( division_id=catalog.division_id, http_status_code__isnull=True) if media_type: distributions = distributions.filter(mediaType=media_type) if not distributions.exists(): return # Collect the distribution-response pairs. def callback(distribution, response): results.append([distribution, response]) # Create a closure. def factory(distribution): return lambda session, response: callback( distribution, response) # @see http://docs.python-requests.org/en/latest/api/#requests.adapters.HTTPAdapter # @see https://github.com/ross/requests-futures/blob/master/requests_futures/sessions.py session = FuturesSession() # Avoids "Connection pool is full, discarding connection" warnings. adapter_kwargs = {'pool_maxsize': 10} session.mount('https://', requests.adapters.HTTPAdapter(**adapter_kwargs)) session.mount('http://', requests.adapters.HTTPAdapter(**adapter_kwargs)) # @see https://djangosnippets.org/snippets/1949/ pk = 0 last_pk = distributions.order_by('-pk')[0].pk distributions = distributions.order_by('pk') while pk < last_pk: # @see https://github.com/ross/requests-futures/issues/18 # @see https://github.com/ross/requests-futures/issues/5 futures = [] results = [] # If an exception occurs, we lose progress on at most 100 requests. for distribution in distributions.filter(pk__gt=pk)[:100]: pk = distribution.pk # @see http://docs.python-requests.org/en/latest/user/advanced/#body-content-workflow # @see http://stackoverflow.com/a/845595/244258 futures.append( session.get(quote(distribution.accessURL, safe="%/:=&?~#+!$,;'@()*[]"), stream=True, verify=False, background_callback=factory(distribution))) for future in futures: try: future.result() except (requests.exceptions.ConnectionError, requests.exceptions.InvalidSchema, requests.exceptions.InvalidURL, requests.exceptions.MissingSchema, requests.exceptions.ReadTimeout, requests.exceptions.SSLError, requests.exceptions.TooManyRedirects, requests.packages.urllib3.exceptions.ProtocolError ): self.exception('') for distribution, response in results: status_code = response.status_code charset = '' content_length = response.headers.get('content-length') if content_length: content_length = int(content_length) # @see https://github.com/kennethreitz/requests/blob/b137472936cbe6a6acabab538c1d05ed4c7da638/requests/utils.py#L308 content_type = response.headers.get('content-type', '') if content_type: content_type, params = cgi.parse_header(content_type) if 'charset' in params: charset = params['charset'].strip("'\"") distribution.http_headers = dict(response.headers) distribution.http_status_code = status_code distribution.http_content_length = content_length distribution.http_content_type = content_type distribution.http_charset = charset distribution.save() self.debug('{} {} {}'.format( status_code, number_to_human_size(content_length), content_type)) response.close() self.info('{} done'.format(catalog))
) # Enable For Debugging: logging.disable(logging.INFO) DELAY = .05 # Second delay between calculating trades. RGAP = .005 # Max gap before cancelling a robux split trade TGAP = .0025 # Max gap before cancelling a tix split trade TRADE_LAG_TIME = 1.25 # Estimate of how long it takes for Roblox to process our requests RESET_TIME = 240 # Number of seconds the bot goes without trading before resetting last rates to be able to trade again (might result in loss) DEQUE_SIZE = 15 # Max number of past trade rates to keep track of to money prevent loss NUM_TRADES = 19 # Number of trades that display on the trade currency page # Initializing requests.Session for frozen application os.environ["REQUESTS_CA_BUNDLE"] = find_data_file('cacert.pem') session = FuturesSession(max_workers=15) adapter = requests.adapters.HTTPAdapter(max_retries=Retry(total=20,connect=10,read=10,backoff_factor=.5)) session.mount("http://", adapter) session.mount("https://", adapter) # Storing variables since they can't be stored in QObject rates = DottedDict( dict( last_tix_rate = 0, last_robux_rate = 0, current_tix_rate = 0, current_robux_rate = 0, past_tix_rates = deque(maxlen=DEQUE_SIZE), past_robux_rates = deque(maxlen=DEQUE_SIZE), ) ) class Trader(QtCore.QObject):
# Enable For Debugging: logging.disable(logging.INFO) DELAY = .05 # Second delay between calculating trades. RGAP = .005 # Max gap before cancelling a robux split trade TGAP = .0025 # Max gap before cancelling a tix split trade TRADE_LAG_TIME = 1.25 # Estimate of how long it takes for Roblox to process our requests RESET_TIME = 240 # Number of seconds the bot goes without trading before resetting last rates to be able to trade again (might result in loss) DEQUE_SIZE = 15 # Max number of past trade rates to keep track of to money prevent loss NUM_TRADES = 19 # Number of trades that display on the trade currency page # Initializing requests.Session for frozen application os.environ["REQUESTS_CA_BUNDLE"] = find_data_file('cacert.pem') session = FuturesSession(max_workers=15) adapter = requests.adapters.HTTPAdapter( max_retries=Retry(total=20, connect=10, read=10, backoff_factor=.5)) session.mount("http://", adapter) session.mount("https://", adapter) # Storing variables since they can't be stored in QObject rates = DottedDict( dict( last_tix_rate=0, last_robux_rate=0, current_tix_rate=0, current_robux_rate=0, past_tix_rates=deque(maxlen=DEQUE_SIZE), past_robux_rates=deque(maxlen=DEQUE_SIZE), )) class Trader(QtCore.QObject): def __init__(self, currency):
class ResourceSyncPuSH(object): """ The base class for the publisher, hub and resource. Contains methods for reading config files, making http requests, error handling, etc. """ def __init__(self): """ Inititalizes the Futures-Requests session with the max number of workers and retires. """ # max workers and retries should be configurable? self.session = FuturesSession(max_workers=10) adapter = HTTPAdapter(max_retries=3) self.session.mount("http://", adapter) self._start_response = None # config parameters self.config = {} self.config['log_mode'] = "" self.config['mimetypes'] = [] self.config['trusted_publishers'] = [] self.config['trusted_topics'] = [] self.config['my_url'] = "" self.config['hub_url'] = "" self.config['topic_url'] = "" self.config['subscribers_file'] = "" self.config['server_path'] = "" # logging messages self.log_msg = {} self.log_msg['payload'] = "" self.log_msg['msg'] = [] self.log_msg['link_header'] = "" self.log_msg['module'] = "" def get_config(self, classname=None): """ Finds and reads the config file. Reads the appropriate config values for the classname provided. For eg: if the classname is hub, it will read from the [hub] section in the config file. """ if not classname: classname = self.__class__.__name__.lower() self.log_msg['module'] = classname # NOTE: more paths can be added to look for the config files. # order of files matter, the config in the first file # will be overwritten by the values in the next file. cnf_file = [] cnf_file.extend([ os.path.join(os.path.dirname(__file__), "../conf/resourcesync_push.ini"), "/etc/resourcesync_push.ini", "/etc/resourcesync_push/resourcesync_push.ini", ]) # loading values from configuration file conf = ConfigParser.ConfigParser() conf.read(cnf_file) if not conf: raise IOError("Unable to read config file") if classname == "hub": self.get_hub_config(conf) elif classname == "publisher": self.get_publisher_config(conf) elif classname == "subscriber": try: self.config['my_url'] = conf.get("subscriber", "url") except (NoSectionError, NoOptionError): print("The url value for subscriber is required \ in the config file.") raise self.get_demo_config(conf) def get_demo_config(self, conf): """ Reads the [demo_hub] section from the config file if the log mode is set to 'demo'. """ try: self.config['log_mode'] = conf.get("general", "log_mode") except (NoSectionError, NoOptionError): pass if not self.config['log_mode'] == "demo": return try: self.config['demo_hub_url'] = conf.get("demo_mode", "hub_url") except (NoSectionError, NoOptionError): print("Demo log mode requires a hub_url in the \ [demo_mode] section") raise try: self.config['demo_topic_url'] = conf.get("demo_mode", "topic_url") except (NoSectionError, NoOptionError): print("Demo log mode requires a topic_url in the \ [demo_mode] section") raise return def get_hub_config(self, conf): """ Reads the [hub] section from the config file. """ try: self.config['mimetypes'] = conf.get("hub", "mimetypes") except (NoSectionError, NoOptionError): # reourcesync hub by default self.config['mimetypes'] = "application/xml" try: self.config['trusted_publishers'] = conf.get( "hub", "trusted_publishers") except (NoSectionError, NoOptionError): # will allow any publisher self.config['trusted_publishers'] = [] try: self.config['trusted_topics'] = conf.get("hub", "trusted_topics") except (NoSectionError, NoOptionError): # will accept any topic self.config['trusted_topics'] = [] try: self.config['my_url'] = conf.get("hub", "url") except (NoSectionError, NoOptionError): print("The url value for hub is required in the config file.") raise self.config['subscribers_file'] = os.path.join( os.path.dirname(__file__), "../db/subscriptions.pk") try: self.config['subscribers_file'] = conf.get("hub", "subscribers_file") except (NoSectionError, NoOptionError): pass if not os.path.isfile(self.config['subscribers_file']): open(self.config['subscribers_file'], 'a').close() return def get_publisher_config(self, conf): """ Reads the [publisher] section in the config file. """ try: self.config['my_url'] = conf.get("publisher", "url") except (NoSectionError, NoOptionError): print("The url value for publisher is required \ in the config file.") raise try: self.config['server_path'] = conf.get("publisher", "server_path") except (NoSectionError, NoOptionError): pass try: self.config['hub_url'] = conf.get("publisher", "hub_url") except (NoSectionError, NoOptionError): print("The hub_url value for publisher is required \ in the config file.") raise try: self.config['topic_url'] = conf.get("publisher", "topic_url") except (NoSectionError, NoOptionError): print("The topic_url value for publisher is required \ in the config file.") raise def send(self, url, method='POST', data=None, callback=None, headers=None): """ Performs http post and get requests. Uses futures-requests to make (threaded) async requests. """ if method == 'POST': return self.session.post(url, data=data, background_callback=callback, headers=headers) elif method == 'GET': return self.session.get(url, headers=headers) elif method == 'HEAD': return self.session.head(url, headers=headers) else: return def respond(self, code=200, msg="OK", headers=None): """ Sends the appropriate http status code with an error message. """ print("HTTP %s: %s" % (code, msg)) if not headers: headers = [] if not str(code) == "204": headers.append(("Content-Type", "text/html")) code = str(code) + " " + HTTP_STATUS_CODE[code] self._start_response(code, headers) return [msg] @staticmethod def get_topic_hub_url(link_header): """ Uses the parse_header_links method in requests to parse link headers and return the topic and hub urls. """ links = parse_header_links(link_header) topic = "" hub_url = "" for link in links: if link.get('rel') == 'self': topic = link.get('url') elif link.get('rel') == 'hub': hub_url = link.get('url') return (topic, hub_url) def make_link_header(self, hub_url=None, topic_url=None): """ Constructs the resourcesync link header. """ if not hub_url and not topic_url: return self.respond(code=400, msg="hub and topic urls are not set \ in config file.") link_header = [] link_header.extend(["<", topic_url, ">;rel=", "self", ","]) link_header.extend([" <", hub_url, ">;rel=", "hub"]) return "".join(link_header) def log(self): """ Log handler. Will send the log info as json to the demo hub if log_mode value is set to demo in the config file. """ if self.config['log_mode'] == 'demo': headers = {} headers['Link'] = self.make_link_header( hub_url=self.config['demo_hub_url'], topic_url=self.config['demo_topic_url']) self.send(self.config['demo_hub_url'], data=json.dumps(self.log_msg), headers=headers) else: print(self.log_msg)
class HTTPDriver(BaseDriver): """HTTPDriver The :class:`HTTPDriver` class reads SBP messages from an HTTP service for a device and writes out to a stream. This driver is like a file-handle with read and writes over two separately HTTP connections, but can also be enabled and disabled by its consumer. Parameters ---------- device_uid : uid Device unique id url : str HTTP endpoint retries : tuple Configure connect and read retry count. Defaults to (MAX_CONNECT_RETRIES, MAX_READ_RETRIES). timeout : tuple Configure connect and read timeouts. Defaults to (DEFAULT_CONNECT_TIMEOUT, DEFAULT_READ_TIMEOUT). """ def __init__(self, device_uid=None, url="https://broker.staging.skylark.swiftnav.com", retries=DEFAULT_RETRIES, timeout=DEFAULT_TIMEOUT,): self._retry = Retry(connect=DEFAULT_RETRIES[0], read=DEFAULT_RETRIES[1], redirect=MAX_REDIRECTS, status_forcelist=[500], backoff_factor=DEFAULT_BACKOFF_FACTOR) self.url = url self.read_session = requests.Session() self.read_session.mount("http://", HTTPAdapter(pool_connections=DEFAULT_POOLSIZE, pool_maxsize=DEFAULT_POOLSIZE, pool_block=DEFAULT_POOLBLOCK, max_retries=self._retry)) self.read_session.mount("https://", HTTPAdapter(pool_connections=DEFAULT_POOLSIZE, pool_maxsize=DEFAULT_POOLSIZE, pool_block=DEFAULT_POOLBLOCK, max_retries=self._retry)) self.write_session = None self.device_uid = device_uid self.timeout = timeout self.read_response = None self.write_response = None self.source = None def flush(self): """File-flush wrapper (noop). """ pass def close(self): """File-handle close wrapper (noop). """ try: self.read_close() self.write_close() except: pass @property def write_ok(self): """ Are we connected for writes? """ # Note that self.write_response is either None or a Response # object, which cast to False for 4xx and 5xx HTTP codes. return bool(self.write_response) def connect_write(self, source, whitelist, device_uid=None, pragma=None): """Initialize a streaming write HTTP response. Manually connects the underlying file-handle. In the event of a network disconnection, use to manually reinitiate an HTTP session. Parameters ---------- source : sbp.client.handler.Handler Iterable source of SBP messages. whitelist : [int] Whitelist of messages to write """ header_device_uid = device_uid or self.device_uid headers = {'Device-Uid': header_device_uid, 'Content-Type': BROKER_SBP_TYPE, 'Pragma': pragma} if not pragma: del headers['Pragma'] try: self.executor = ThreadPoolExecutor(max_workers=DEFAULT_POOLSIZE) self.write_session = FuturesSession(executor=self.executor) self.write_session.mount("http://", HTTPAdapter(pool_connections=DEFAULT_POOLSIZE, pool_maxsize=DEFAULT_POOLSIZE, pool_block=DEFAULT_POOLBLOCK, max_retries=self._retry)) self.write_session.mount("https://", HTTPAdapter(pool_connections=DEFAULT_POOLSIZE, pool_maxsize=DEFAULT_POOLSIZE, pool_block=DEFAULT_POOLBLOCK, max_retries=self._retry)) self.source = source.filter(whitelist) gen = (msg.pack() for msg, _ in self.source) self.write_session.put(self.url, data=gen, headers=headers) self.write_response = True except requests.exceptions.ConnectionError as err: msg = "Client connection error to %s with [PUT] headers %s: msg=%s" \ % (self.url, headers, err.message) warnings.warn(msg) except requests.exceptions.ConnectTimeout as err: msg = "Client connection timeout to %s with [PUT] headers %s: msg=%s" \ % (self.url, headers, err.message) warnings.warn(msg) except requests.exceptions.RetryError: msg = "Client retry error to %s with [PUT] headers %s: msg=%s" \ % (self.url, headers, err.message) warnings.warn(msg) except requests.exceptions.ReadTimeout: msg = "Client read timeout to %s with [PUT] headers %s: msg=%s" \ % (self.url, headers, err.message) warnings.warn(msg) return self.write_ok def write(self, data): """Write wrapper (noop). Actual stream is initiated by the write connection. Parameters ---------- data : object Data to write. """ pass def write_close(self): """File-handle close wrapper (noop). """ try: self.write_session.close() self.executor.shutdown(wait=False) self.source.breakiter() self.source = None self.executor = None self.write_session = None except: pass @property def read_ok(self): """ Are we connected for reads? """ return bool(self.read_response) def connect_read(self, device_uid=None, pragma=None): """Initialize a streaming read/write HTTP response. Manually connects the underlying file-handle. In the event of a network disconnection, use to manually reinitiate an HTTP session. """ header_device_uid = device_uid or self.device_uid headers = {'Device-Uid': header_device_uid, 'Accept': BROKER_SBP_TYPE, 'Pragma': pragma} if not pragma: del headers['Pragma'] try: self.read_response = self.read_session.get(self.url, stream=True, headers=headers, timeout=self.timeout) except requests.exceptions.ConnectionError as err: msg = "Client connection error to %s with [GET] headers %s: msg=%s" \ % (self.url, headers, err.message) warnings.warn(msg) except requests.exceptions.ConnectTimeout as err: msg = "Client connection timeout to %s with [GET] headers %s: msg=%s" \ % (self.url, headers, err.message) warnings.warn(msg) except requests.exceptions.RetryError: msg = "Client retry error to %s with [GET] headers %s: msg=%s" \ % (self.url, headers, err.message) warnings.warn(msg) except requests.exceptions.ReadTimeout: msg = "Client read timeout to %s with [GET] headers %s: msg=%s" \ % (self.url, headers, err.message) warnings.warn(msg) return self.read_ok def read(self, size): """Read wrapper. If the client connection is closed or some other exception is thrown, raises an IOError. Parameters ---------- size : int Size to read (in bytes). Returns ---------- bytearray, or None """ if self.read_response is None or not self.device_uid: raise ValueError("Invalid/insufficient HTTP request parameters!") elif not self.read_ok or self.read_response.raw.closed: raise IOError("HTTP read closed?!") try: return self.read_response.raw.read(size) except: raise IOError("HTTP read error!") def read_close(self): """File-handle close wrapper (noop). """ try: self.read_response.close() self.read_response = None except: pass
import us import logging import requests import datetime import lxml.html import itertools import urllib.parse as urlparse from nameparser import HumanName from requests.adapters import HTTPAdapter from requests_futures.sessions import FuturesSession from concurrent.futures import as_completed session = FuturesSession(max_workers=10) session.mount('https://classic.nga.org', HTTPAdapter(max_retries=15)) KNOWN_EXCEPTIONS = { 'https://classic.nga.org/cms/sam-brownback': [{'start': '2011-01-10', 'end': '2015-01-12'}, {'start': '2015-01-12', 'end': '2018-01-31'}] } def parse_date(text): if not text: return None return datetime.datetime.strptime(text, '%b %d, %Y').date().isoformat() def parse_term(text): start, end = [parse_date(x.strip()) for x in text.split('-')] return {'start': start, 'end': end}
class CallHub: API_LIMIT = { "GENERAL": {"calls": 13, "period": 1}, "BULK_CREATE": {"calls": 1, "period": 70}, } def __init__(self, api_domain, api_key=None, rate_limit=API_LIMIT): """ Instantiates a new CallHub instance >>> callhub = CallHub("https://api-na1.callhub.io") With built-in rate limiting disabled: >>> callhub = CallHub(rate_limit=False) Args: api_domain (``str``): Domain to access API (eg: api.callhub.io, api-na1.callhub.io), this varies by account Keyword Args: api_key (``str``, optional): Optional API key. If not provided, it will attempt to use ``os.environ['CALLHUB_API_KEY']`` rate_limit (``dict``, optional): Enabled by default with settings that respect callhub's API limits. Setting this to false disables ratelimiting, or you can set your own limits by following the example below. Please don't abuse! :) >>> callhub = CallHub(rate_limit={"GENERAL": {"calls": 13, "period": 1}, >>> "BULK_CREATE": {"calls": 1, "period": 70}}) - Default limits bulk_create to 1 per 70 seconds (CallHub states their limit is every 60s but in practice a delay of 60s exactly can trip their rate limiter anyways) - Default limits all other API requests to 13 per second (CallHub support states their limit is 20/s but this plays it on the safe side, because other rate limiters seem a little sensitive) """ self.session = FuturesSession(max_workers=43) # Attempt 3 retries for failed connections adapter = requests.adapters.HTTPAdapter(max_retries=3) self.session.mount('https://', adapter) self.session.mount('http://', adapter) # Truncate final '/' off of API domain if it was provided if api_domain[-1] == "/": self.api_domain = api_domain[:-1] else: self.api_domain = api_domain if rate_limit: # Apply general rate limit to self.session.get rate_limited_get = sleep_and_retry(limits(**rate_limit["GENERAL"])(FuturesSession.get)) self.session.get = types.MethodType(rate_limited_get, self.session) # Apply general rate limit to self.session.post rate_limited_post = sleep_and_retry(limits(**rate_limit["GENERAL"])(FuturesSession.post)) self.session.post = types.MethodType(rate_limited_post, self.session) # Apply bulk rate limit to self.bulk_create self.bulk_create = sleep_and_retry(limits(**rate_limit["BULK_CREATE"])(self.bulk_create)) self.session.auth = CallHubAuth(api_key=api_key) # validate_api_key returns administrator email on success self.admin_email = self.validate_api_key() # cache for do-not-contact number/list to id mapping self.dnc_cache = {} def __repr__(self): return "<CallHub admin: {}>".format(self.admin_email) def _collect_fields(self, contacts): """ Internal Function to get all fields used in a list of contacts """ fields = set() for contact in contacts: for key in contact: fields.add(key) return fields def _assert_fields_exist(self, contacts): """ Internal function to check if fields in a list of contacts exist in CallHub account If fields do not exist, raises LookupError. """ # Note: CallHub fields are implemented funkily. They can contain capitalization but "CUSTOM_FIELD" # and "custom_field" cannot exist together in the same account. For that reason, for the purposes of API work, # fields are treated as case insensitive despite capitalization being allowed. Attempting to upload a contact # with "CUSTOM_FIELD" will match to "custom_field" in a CallHub account. fields_in_contacts = self._collect_fields(contacts) fields_in_callhub = self.fields() # Ensure case insensitivity and convert to set fields_in_contact = set([field.lower() for field in fields_in_contacts]) fields_in_callhub = set([field.lower() for field in fields_in_callhub.keys()]) if fields_in_contact.issubset(fields_in_callhub): return True else: raise LookupError("Attempted to upload contact (s) that contain fields that haven't been " "created in CallHub. Fields present in upload: {} Fields present in " "account: {}".format(fields_in_contact, fields_in_callhub)) def validate_api_key(self): """ Returns admin email address if API key is valid. In rare cases, may be unable to find admin email address, and returns a warning in that case. If API key invalid, raises ValueError. If the CallHub API returns unexpected information, raises RunTimeError. Returns: username (``str``): Email of administrator account """ response = self.session.get("{}/v1/agents/".format(self.api_domain)).result() if response.json().get("detail") in ['User inactive or deleted.', 'Invalid token.']: raise ValueError("Bad API Key") elif "count" in response.json(): if response.json()["count"]: return response.json()["results"][0]["owner"][0]["username"] else: return "Cannot deduce admin account. No agent accounts (not even the default account) exist." else: raise RuntimeError("CallHub API is not returning expected values, but your api_key is fine. Their API " "specifies that https://callhub-api-domain/v1/agents returns a 'count' field, but this was " "not returned. Please file an issue on GitHub for this project, if an issue for this not " "already exist.") def agent_leaderboard(self, start, end): params = {"start_date": start, "end_date": end} response = self.session.get("{}/v1/analytics/agent-leaderboard/".format(self.api_domain), params=params).result() return response.json().get("plot_data") def fields(self): """ Returns a list of fields configured in the CallHub account and their ids Returns: fields (``dict``): dictionary of fields and ids >>> {"first name": 0, "last name": 1} """ response = self.session.get('{}/v1/contacts/fields/'.format(self.api_domain)).result() return {field['name']: field["id"] for field in response.json()["results"]} def bulk_create(self, phonebook_id, contacts, country_iso): """ Leverages CallHub's bulk-upload feature to create many contacts. Supports custom fields. >>> contacts = [{'first name': 'Sumiya', 'phone number':'5555555555', 'mobile number': '5555555555'}, >>> {'first name': 'Joe', 'phone number':'5555555555', 'mobile number':'5555555555'}] >>> callhub.bulk_create(885473, contacts, 'CA') Args: phonebook_id(``int``): ID of phonebank to insert contacts into. contacts(``list``): Contacts to insert (phone number is a MANDATORY field in all contacts) country_iso(``str``): ISO 3166 two-char country code, see https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2 """ # Step 1. Get all fields from CallHub account # Step 2. Check if all fields provided for contacts exist in CallHub account # Step 3. Turn list of dictionaries into a CSV file and create a column mapping for the file # Step 4. Upload the CSV and column mapping to CallHub contacts = [CaseInsensitiveDict(contact) for contact in contacts] if self._assert_fields_exist(contacts): # Create CSV file in memory in a way that pleases CallHub and generate column mapping csv_file, mapping = csv_and_mapping_create(contacts, self.fields()) # Upload CSV data = { 'phonebook_id': phonebook_id, 'country_choice': 'custom', 'country_ISO': country_iso, 'mapping': mapping } response = self.session.post('{}/v1/contacts/bulk_create/'.format(self.api_domain), data=data, files={'contacts_csv': csv_file}).result() if "Import in progress" in response.json().get("message", ""): return True elif 'Request was throttled' in response.json().get("detail", ""): raise RuntimeError("Bulk_create request was throttled because rate limit was exceeded.", response.json()) else: raise RuntimeError("CallHub did not report that import was successful: ", response.json()) def create_contact(self, contact): """ Creates single contact. Supports custom fields. >>> contact = {'first name': 'Sumiya', 'phone number':'5555555555', 'mobile number': '5555555555'} >>> callhub.create_contact(contact) Args: contacts(``dict``): Contacts to insert Note that country_code and phone_number are MANDATORY Returns: (``str``): ID of created contact or None if contact not created """ if self._assert_fields_exist([contact]): url = "{}/v1/contacts/".format(self.api_domain) responses, errors = self._handle_requests([{ "func": self.session.post, "func_params": {"url": url, "data": {"name": contact}}, "expected_status": 201 }]) if errors: raise RuntimeError(errors) return responses[0].json().get("id") def get_contacts(self, limit): """ Gets all contacts. Args: limit (``int``): Limit of number of contacts to get. If limit not provided, will return first 100 contacts. Returns: contact_list (``list``): List of contacts, where each contact is a dict of key value pairs. """ contacts_url = "{}/v1/contacts/".format(self.api_domain) return self._get_paged_data(contacts_url, limit) def _get_paged_data(self, url, limit=float(math.inf)): """ Internal function. Leverages _bulk_requests to aggregate paged data and return it quickly. Args: url (``str``): API endpoint to get paged data from. Keyword Args: limit (``float or int``): Limit of paged data to get. Default is infinity. Returns: paged_data (``list``) All of the paged data as a signle list of dicts, where each dict contains key value pairs that represent each individual item in a page. """ first_page = self.session.get(url).result() if first_page.status_code != 200: raise RuntimeError("Status code {} when making request to: " "{}, expected 200. Details: {})".format(first_page.status_code, url, first_page.text)) first_page = first_page.json() # Handle either limit of 0 or no results if first_page["count"] == 0 or limit == 0: return [] # Set limit to the smallest of either the count or the limit limit = min(first_page["count"], limit) # Calculate number of pages page_size = len(first_page["results"]) num_pages = math.ceil(limit/page_size) requests = [] for i in range(1, num_pages+1): requests.append({"func": self.session.get, "func_params": {"url": url, "params": {"page": i}}, "expected_status": 200}) responses_list, errors = self._handle_requests(requests) if errors: raise RuntimeError(errors) # Turn list of responses into aggregated data from all pages paged_data = [] for response in responses_list: paged_data += response.json()["results"] paged_data = paged_data[:limit] return paged_data def _handle_requests(self, requests_list, aggregate_json_value=None, retry=False, current_retry_count=0): """ Internal function. Executes a list of requests in batches, asynchronously. Allows fast execution of many reqs. >>> requests_list = [{"func": session.get, >>> "func_params": {"url":"https://callhub-api-domain/v1/contacts/", "params":{"page":"1"}}} >>> "expected_status": 200] >>> _bulk_request(requests_list) Args: requests_list (``list``): List of dicts that each include a request function, its parameters, and an optional expected status. These will be executed in batches. """ # Send bulk requests in batches of at most 500 batch_size = 500 requests_awaiting_response = [] responses = [] errors = [] for i, request in enumerate(requests_list): # Execute request asynchronously requests_awaiting_response.append(request["func"](**request["func_params"])) # Every time we execute batch_size requests OR we have made our last request, wait for all requests # to have received responses before continuing. This batching prevents us from having tens or hundreds of # thousands of pending requests with CallHub if i % batch_size == 0 or i == (len(requests_list)-1): for req_awaiting_response in requests_awaiting_response: response = req_awaiting_response.result() try: if requests_list[i]["expected_status"] and response.status_code != int(requests_list[i]["expected_status"]): raise RuntimeError("Status code {} when making request to: " "{}, expected {}. Details: {})".format(response.status_code, requests_list[i]["func_params"]["url"], requests_list[i]["expected_status"], response.text)) responses.append(response) except RuntimeError as api_except: errors.append((requests_list[i], api_except)) requests_awaiting_response = [] if errors and retry and current_retry_count < 1: failed_requests = [error[0] for error in errors] new_responses, errors = self._handle_requests(failed_requests, retry=True, current_retry_count=current_retry_count+1) responses = responses + new_responses return responses, errors def get_dnc_lists(self): """ Returns ids and names of all do-not-contact lists Returns: dnc_lists (``dict``): Dictionary of dnc lists where the key is the id and the value is the name """ dnc_lists = self._get_paged_data("{}/v1/dnc_lists/".format(self.api_domain)) return {dnc_list['url'].split("/")[-2]: dnc_list["name"] for dnc_list in dnc_lists} def pretty_format_dnc_data(self, dnc_contacts): dnc_lists = self.get_dnc_lists() dnc_phones = defaultdict(list) for dnc_contact in dnc_contacts: phone = dnc_contact["phone_number"] dnc_list_id = dnc_contact["dnc"].split("/")[-2] dnc_contact_id = dnc_contact["url"].split("/")[-2] dnc_list = {"list_id": dnc_list_id, "name": dnc_lists[dnc_list_id], "dnc_contact_id": dnc_contact_id} dnc_phones[phone].append(dnc_list) return dict(dnc_phones) def get_dnc_phones(self): """ Returns all phone numbers in all DNC lists Returns: dnc_phones (``dict``): Dictionary of all phone numbers in all dnc lists. A phone number may be associated with multiple dnc lists. Note that each phone number on each dnc list has a unique dnc_contact_id that has NOTHING to do with the contact_id of the actual contacts related to those phone numbers. Schema: >>> dnc_contacts = {"16135554432": [ >>> {"list_id": 5543, "name": "Default DNC List", "dnc_contact_id": 1234} >>> {"list_id": 8794, "name": "SMS Campaign", "dnc_contact_id": 4567} >>> ]}} """ dnc_contacts = self._get_paged_data("{}/v1/dnc_contacts/".format(self.api_domain)) return self.pretty_format_dnc_data(dnc_contacts) def add_dnc(self, phone_numbers, dnc_list_id): """ Adds phone numbers to a DNC list of choice Args: phone_numbers (``list``): Phone numbers to add to DNC dnc_list (``str``): DNC list id to add contact(s) to Returns: results (``dict``): Dict of phone numbers and DNC lists added to errors (``list``): List of errors and failures """ if not isinstance(phone_numbers, list): raise TypeError("add_dnc expects a list of phone numbers. If you intend to only add one number to the " "do-not-contact list, add a list of length 1") url = "{}/v1/dnc_contacts/".format(self.api_domain) requests = [] for number in phone_numbers: data = {"dnc": "{}/v1/dnc_lists/{}/".format(self.api_domain, dnc_list_id), 'phone_number': number} requests.append({"func": self.session.post, "func_params": {"url": url, "data":data}, "expected_status": 201}) responses, errors = self._handle_requests(requests, retry=True) dnc_records = [request.json() for request in responses] results = self.pretty_format_dnc_data(dnc_records) return results, errors def remove_dnc(self, numbers, dnc_list=None): """ Removes phone numbers from do-not-contact list. CallHub's api does not support this, instead it only supports removing phone numbers by their internal do not contact ID. I want to abstract away from that, but it requires building a table of phone numbers mapping to their dnc ids, which can slow this function down especially when using an account with many numbers already marked do-not-contact. This function takes advantage of caching to get around this, and a CallHub instance will have a cache of numbers and dnc lists -> dnc_contact ids available for use. This cache is refreshed if a number is requested to be removed from the DNC list that does not appear in the cache. Args: phone_numbers (``list``): Phone numbers to remove from DNC Keyword Args: dnc_list (``str``, optional): DNC list id to remove numbers from. If not specified, will remove number from all dnc lists. Returns: errors (``list``): List of errors """ # Check if we need to refresh DNC phone numbers cache if not set(numbers).issubset(set(self.dnc_cache.keys())): self.dnc_cache = self.get_dnc_phones() dnc_ids_to_purge = [] for number in numbers: for dnc_entry in self.dnc_cache[number]: if dnc_list and (dnc_entry["list_id"] == dnc_list): dnc_ids_to_purge.append(dnc_entry["dnc_contact_id"]) elif not dnc_list: dnc_ids_to_purge.append(dnc_entry["dnc_contact_id"]) url = "{}/v1/dnc_contacts/{}/" requests = [] for dnc_id in dnc_ids_to_purge: requests.append({"func": self.session.delete, "func_params": {"url": url.format(self.api_domain, dnc_id)}, "expected_status": 204}) responses, errors = self._handle_requests(requests) return errors def create_dnc_list(self, name): """ Creates a new DNC list Args: name (``str``): Name to assign to DNC list Returns: id (``str``): ID of created dnc list """ url = "{}/v1/dnc_lists/".format(self.api_domain) responses, errors = self._handle_requests([{ "func": self.session.post, "func_params": {"url": url, "data": {"name": name}}, "expected_status": 201 }]) if errors: raise RuntimeError(errors) return responses[0].json()["url"].split("/")[-2] def remove_dnc_list(self, id): """ Deletes an existing DNC list Args: id (``str``): ID of DNC list to delete """ url = "{}/v1/dnc_lists/{}/" responses, errors = self._handle_requests([{ "func": self.session.delete, "func_params": {"url": url.format(self.api_domain, id)}, "expected_status": 204 }]) if errors: raise RuntimeError(errors) def get_campaigns(self): """ Get call campaigns Returns: campaigns (``dict``): list of campaigns """ url = "{}/v1/callcenter_campaigns/".format(self.api_domain) campaigns = self._get_paged_data(url) # Extract campaign id from url for i, campaign in enumerate(campaigns): id = campaign["url"].split("/")[-2] campaigns[i]["id"] = id return campaigns def create_phonebook(self, name, description=""): """ Create a phonebook Args: name (``str``): Name of phonebook Keyword Args: description (``str``, optional): Description of phonebook Returns: id (``str``): id of phonebook """ url = "{}/v1/phonebooks/".format(self.api_domain) responses, errors = self._handle_requests([{ "func": self.session.post, "func_params": {"url": url, "data": {"name": name, "description": description}}, "expected_status": 201 }]) if errors: raise RuntimeError(errors) id = responses[0].json()["url"].split("/")[-2] return id def create_webhook(self, target, event="cc.notes"): """ Creates a webhook on a particular target Args: target (``str``): URL for CallHub to send webhook to Keyword Args: event (``str``, optional): Event which triggers webhook. Default: When an agent completes a call (cc.notes) Returns: id (``str``): id of created webhook """ url = "{}/v1/webhooks/".format(self.api_domain) responses, errors = self._handle_requests([{ "func": self.session.post, "func_params": {"url": url, "data": {"target": target, "event": event}}, "expected_status": 201 }]) if errors: raise RuntimeError(errors) return responses[0].json()["id"] def get_webhooks(self): """ Fetches webhooks created by a CallHub account Returns: webhooks (``dict``): list of webhooks """ url = "{}/v1/webhooks/".format(self.api_domain) webhooks = self._get_paged_data(url) return webhooks def remove_webhook(self, id): """ Deletes a webhook with a given id Args: id (``str``): id of webhook to delete """ url = "{}/v1/webhooks/{}/".format(self.api_domain, id) responses, errors = self._handle_requests([{ "func": self.session.delete, "func_params": {"url": url}, "expected_status": 204 }]) if errors: raise RuntimeError(errors) def export_campaign(self, id): """ Triggers an export from CallHub's campaign export API. Note that the returned download link only works in an authenticated USER session for the callhub account in question. There is no way to download call campaign results directly through the API, you can only trigger exports. Because of this, there is a very limited use case for this function. Args: id (``str``): id of campaign to export Returns: url (``str``): download link for campaign """ # Step 1: Request export of campaign url = "{}/v1/power_campaign/{}/export/".format(self.api_domain, id) responses, errors = self._handle_requests([{ "func": self.session.post, "func_params": {"url": url}, "expected_status": 202 }]) if errors: raise RuntimeError(errors) polling_url = responses[0].json()["polling_url"] # Step 2: Continuously check if export is complete - 5 min maximum num_attempts_made = 0 state = "PENDING" while state == "PENDING" or state == "PROGRESS": time.sleep(1) responses, errors = self._handle_requests([{ "func": self.session.get, "func_params": {"url": polling_url}, "expected_status": 200 }]) if errors: raise RuntimeError(errors) state = responses[0].json()["state"] num_attempts_made += 1 if num_attempts_made == 300: state = "TIMEOUT" if state != "SUCCESS": raise RuntimeError("CallHub reported an error trying to export the campaign. State: {}. " "Full Response: {}".format(state, responses[0].text)) if responses[0].json()["data"]["code"] != 200: raise RuntimeError("CallHub reported an error trying to export the campaign. " "Full Response: {}".format(responses[0].text)) return responses[0].json()["data"]["url"]
import concurrent.futures import pandas as pd from datetime import datetime from functools import reduce from bs4 import BeautifulSoup, SoupStrainer from urllib3.util.retry import Retry from requests.adapters import HTTPAdapter from requests_futures.sessions import FuturesSession from website.client import exceptions logger = logging.getLogger(__name__) session = FuturesSession(max_workers=6) retries = Retry(total=3, backoff_factor=1.0, status_forcelist=[500, 502, 503, 504]) session.mount("http://", HTTPAdapter(max_retries=retries)) payloads = [] def set_credentials(username, password): try: logger.info("Setting credentials") r = session.get( "https://clairvia.texaschildrens.org/ClairviaWeb/Login.aspx" ).result() soup = BeautifulSoup(r.text, "lxml") login_form_inputs = [ ((input.get("name"), input.get("value"))) for input in soup.find_all("input")
templateData = json.load(f) for definition in templateData: currentSeriesNames.append(definition["name"]) currentSeries.append(ValueTemplate(definition["name"], definition["min"], definition["max"])) else: currentSeries.append(ValueTemplate("temperature", 10, 40)) currentSeries.append(ValueTemplate("pressure", 950, 1100)) currentSeries.append(ValueTemplate("humidity", 20, 90)) m = Measurement(unide.process.local_now(), dimensions=currentSeriesNames) a = requests.adapters.HTTPAdapter(max_retries=maxRetries) session = FuturesSession() session.mount('http://', a) session.headers = { "Content-Type": "application/json", "Authorization": authHeader } def bg_cb(sess, resp): # parse the json storing the result on the response object resp.data = resp.json() print(resp) while True: lastMeasurement = datetime.datetime.utcnow() newMetrics = dict()
class ResourceSyncPuSH(object): """ The base class for the publisher, hub and resource. Contains methods for reading config files, making http requests, error handling, etc. """ def __init__(self): """ Inititalizes the Futures-Requests session with the max number of workers and retires. """ # max workers and retries should be configurable? self.session = FuturesSession(max_workers=10) adapter = HTTPAdapter(max_retries=3) self.session.mount("http://", adapter) self._start_response = None # config parameters self.config = {} self.config['log_mode'] = "" self.config['mimetypes'] = [] self.config['trusted_publishers'] = [] self.config['trusted_topics'] = [] self.config['my_url'] = "" self.config['hub_url'] = "" self.config['topic_url'] = "" self.config['subscribers_file'] = "" self.config['server_path'] = "" # logging messages self.log_msg = {} self.log_msg['payload'] = "" self.log_msg['msg'] = [] self.log_msg['link_header'] = "" self.log_msg['module'] = "" def get_config(self, classname=None): """ Finds and reads the config file. Reads the appropriate config values for the classname provided. For eg: if the classname is hub, it will read from the [hub] section in the config file. """ if not classname: classname = self.__class__.__name__.lower() self.log_msg['module'] = classname # NOTE: more paths can be added to look for the config files. # order of files matter, the config in the first file # will be overwritten by the values in the next file. cnf_file = [] cnf_file.extend([ os.path.join(os.path.dirname(__file__), "../conf/resourcesync_push.ini"), "/etc/resourcesync_push.ini", "/etc/resourcesync_push/resourcesync_push.ini", ]) # loading values from configuration file conf = ConfigParser.ConfigParser() conf.read(cnf_file) if not conf: raise IOError("Unable to read config file") if classname == "hub": self.get_hub_config(conf) elif classname == "publisher": self.get_publisher_config(conf) elif classname == "subscriber": try: self.config['my_url'] = conf.get("subscriber", "url") except (NoSectionError, NoOptionError): print("The url value for subscriber is required \ in the config file.") raise self.get_demo_config(conf) def get_demo_config(self, conf): """ Reads the [demo_hub] section from the config file if the log mode is set to 'demo'. """ try: self.config['log_mode'] = conf.get("general", "log_mode") except (NoSectionError, NoOptionError): pass if not self.config['log_mode'] == "demo": return try: self.config['demo_hub_url'] = conf.get("demo_mode", "hub_url") except (NoSectionError, NoOptionError): print("Demo log mode requires a hub_url in the \ [demo_mode] section") raise try: self.config['demo_topic_url'] = conf.get("demo_mode", "topic_url") except (NoSectionError, NoOptionError): print("Demo log mode requires a topic_url in the \ [demo_mode] section") raise return def get_hub_config(self, conf): """ Reads the [hub] section from the config file. """ try: self.config['mimetypes'] = conf.get("hub", "mimetypes") except (NoSectionError, NoOptionError): # reourcesync hub by default self.config['mimetypes'] = "application/xml" try: self.config['trusted_publishers'] = conf.get("hub", "trusted_publishers") except (NoSectionError, NoOptionError): # will allow any publisher self.config['trusted_publishers'] = [] try: self.config['trusted_topics'] = conf.get("hub", "trusted_topics") except (NoSectionError, NoOptionError): # will accept any topic self.config['trusted_topics'] = [] try: self.config['my_url'] = conf.get("hub", "url") except (NoSectionError, NoOptionError): print("The url value for hub is required in the config file.") raise self.config['subscribers_file'] = os.path.join( os.path.dirname(__file__), "../db/subscriptions.pk" ) try: self.config['subscribers_file'] = conf.get("hub", "subscribers_file") except (NoSectionError, NoOptionError): pass if not os.path.isfile(self.config['subscribers_file']): open(self.config['subscribers_file'], 'a').close() return def get_publisher_config(self, conf): """ Reads the [publisher] section in the config file. """ try: self.config['my_url'] = conf.get("publisher", "url") except (NoSectionError, NoOptionError): print("The url value for publisher is required \ in the config file.") raise try: self.config['server_path'] = conf.get("publisher", "server_path") except (NoSectionError, NoOptionError): pass try: self.config['hub_url'] = conf.get("publisher", "hub_url") except (NoSectionError, NoOptionError): print("The hub_url value for publisher is required \ in the config file.") raise try: self.config['topic_url'] = conf.get("publisher", "topic_url") except (NoSectionError, NoOptionError): print("The topic_url value for publisher is required \ in the config file.") raise def send(self, url, method='POST', data=None, callback=None, headers=None): """ Performs http post and get requests. Uses futures-requests to make (threaded) async requests. """ if method == 'POST': return self.session.post(url, data=data, background_callback=callback, headers=headers) elif method == 'GET': return self.session.get(url, headers=headers) elif method == 'HEAD': return self.session.head(url, headers=headers) else: return def respond(self, code=200, msg="OK", headers=None): """ Sends the appropriate http status code with an error message. """ print("HTTP %s: %s" % (code, msg)) if not headers: headers = [] if not str(code) == "204": headers.append(("Content-Type", "text/html")) code = str(code) + " " + HTTP_STATUS_CODE[code] self._start_response(code, headers) return [msg] @staticmethod def get_topic_hub_url(link_header): """ Uses the parse_header_links method in requests to parse link headers and return the topic and hub urls. """ links = parse_header_links(link_header) topic = "" hub_url = "" for link in links: if link.get('rel') == 'self': topic = link.get('url') elif link.get('rel') == 'hub': hub_url = link.get('url') return (topic, hub_url) def make_link_header(self, hub_url=None, topic_url=None): """ Constructs the resourcesync link header. """ if not hub_url and not topic_url: return self.respond(code=400, msg="hub and topic urls are not set \ in config file.") link_header = [] link_header.extend(["<", topic_url, ">;rel=", "self", ","]) link_header.extend([" <", hub_url, ">;rel=", "hub"]) return "".join(link_header) def log(self): """ Log handler. Will send the log info as json to the demo hub if log_mode value is set to demo in the config file. """ if self.config['log_mode'] == 'demo': headers = {} headers['Link'] = self.make_link_header( hub_url=self.config['demo_hub_url'], topic_url=self.config['demo_topic_url'] ) self.send(self.config['demo_hub_url'], data=json.dumps(self.log_msg), headers=headers) else: print(self.log_msg)
import os, shutil, time, concurrent.futures, ctypes import requests from requests_futures.sessions import FuturesSession from bs4 import BeautifulSoup os.system('title nHentai Downloader v1.0 By NekoChan') session = FuturesSession() session.mount('https://', requests.adapters.HTTPAdapter(max_retries=3)) kernel32 = ctypes.windll.kernel32 while True: kernel32.SetConsoleMode( kernel32.GetStdHandle(-10), (0x4 | 0x80 | 0x20 | 0x2 | 0x10 | 0x1 | 0x40 | 0x100)) bookId = input(f'請輸入ID:') nBook = requests.get(f'https://nhentai.net/g/{bookId}/') if nBook.status_code == 200: kernel32.SetConsoleMode( kernel32.GetStdHandle(-10), (0x4 | 0x80 | 0x20 | 0x2 | 0x10 | 0x1 | 0x00 | 0x100)) begin = time.time() html = BeautifulSoup(nBook.text, 'html.parser') coverImgHtml = html.select('#cover > a > img')[0].get('data-src') imgType = coverImgHtml.split('.')[-1] galleriesId = coverImgHtml.split('/')[-2] pages = int(
def retrieve(self, catalog, *, dry_run=False, media_type=''): if not dry_run: distributions = Distribution.objects.filter( division_id=catalog.division_id, http_status_code__isnull=True) if media_type: distributions = distributions.filter(mediaType=media_type) if not distributions.exists(): return # Collect the distribution-response pairs. def callback(distribution, response): results.append([distribution, response]) # Create a closure. def factory(distribution): return lambda session, response: callback(distribution, response) # @see http://docs.python-requests.org/en/latest/api/#requests.adapters.HTTPAdapter # @see https://github.com/ross/requests-futures/blob/master/requests_futures/sessions.py session = FuturesSession() # Avoids "Connection pool is full, discarding connection" warnings. adapter_kwargs = {'pool_maxsize': 10} session.mount('https://', requests.adapters.HTTPAdapter(**adapter_kwargs)) session.mount('http://', requests.adapters.HTTPAdapter(**adapter_kwargs)) # @see https://djangosnippets.org/snippets/1949/ pk = 0 last_pk = distributions.order_by('-pk')[0].pk distributions = distributions.order_by('pk') while pk < last_pk: # @see https://github.com/ross/requests-futures/issues/18 # @see https://github.com/ross/requests-futures/issues/5 futures = [] results = [] # If an exception occurs, we lose progress on at most 100 requests. for distribution in distributions.filter(pk__gt=pk)[:100]: pk = distribution.pk # @see http://docs.python-requests.org/en/latest/user/advanced/#body-content-workflow # @see http://stackoverflow.com/a/845595/244258 futures.append( session.get( quote( distribution.accessURL, safe="%/:=&?~#+!$,;'@()*[]"), stream=True, verify=False, background_callback=factory(distribution))) for future in futures: try: future.result() except (requests.exceptions.ConnectionError, requests.exceptions.InvalidSchema, requests.exceptions.InvalidURL, requests.exceptions.MissingSchema, requests.exceptions.ReadTimeout, requests.exceptions.SSLError, requests.exceptions.TooManyRedirects, requests.packages.urllib3.exceptions.ProtocolError ): self.exception('') for distribution, response in results: status_code = response.status_code charset = '' content_length = response.headers.get('content-length') if content_length: content_length = int(content_length) # @see https://github.com/kennethreitz/requests/blob/b137472936cbe6a6acabab538c1d05ed4c7da638/requests/utils.py#L308 content_type = response.headers.get('content-type', '') if content_type: content_type, params = cgi.parse_header(content_type) if 'charset' in params: charset = params['charset'].strip("'\"") distribution.http_headers = dict(response.headers) distribution.http_status_code = status_code distribution.http_content_length = content_length distribution.http_content_type = content_type distribution.http_charset = charset distribution.save() self.debug('{} {} {}'.format( status_code, number_to_human_size(content_length), content_type)) response.close() self.info('{} done'.format(catalog))