def prepareRequest(maxRetries=100): s = requests.Session() retries = Retry(total=maxRetries) s.mount('https://', HTTPAdapter(max_retries=retries)) return s
def get_session(self, *, state=None, redirect_uri=None, load_token=False, scopes=None): """ Create a requests Session object :param str state: session-state identifier to rebuild OAuth session (CSRF protection) :param str redirect_uri: callback URL specified in previous requests :param list(str) scopes: list of scopes we require access to :param bool load_token: load and ensure token is present :return: A ready to use requests session, or a rebuilt in-flow session :rtype: OAuth2Session """ redirect_uri = redirect_uri or self.oauth_redirect_url client_id, _ = self.auth if self.auth_flow_type == 'authorization': oauth_client = WebApplicationClient(client_id=client_id) elif self.auth_flow_type == 'credentials': oauth_client = BackendApplicationClient(client_id=client_id) else: raise ValueError( '"auth_flow_type" must be either "authorization" or "credentials"' ) requested_scopes = scopes or self.scopes if load_token: # gets a fresh token from the store token = self.token_backend.get_token() if token is None: raise RuntimeError( 'No auth token found. Authentication Flow needed') oauth_client.token = token if self.auth_flow_type == 'authorization': requested_scopes = None # the scopes are already in the token (Not if type is backend) session = OAuth2Session(client_id=client_id, client=oauth_client, token=token, scope=requested_scopes) else: session = OAuth2Session(client_id=client_id, client=oauth_client, state=state, redirect_uri=redirect_uri, scope=requested_scopes) session.proxies = self.proxy if self.request_retries: retry = Retry(total=self.request_retries, read=self.request_retries, connect=self.request_retries, backoff_factor=RETRIES_BACKOFF_FACTOR, status_forcelist=RETRIES_STATUS_LIST) adapter = HTTPAdapter(max_retries=retry) session.mount('http://', adapter) session.mount('https://', adapter) return session
'''def get_ruian_au_feature_geometry_from_wfs(gml_id): url='https://services.cuzk.cz/wfs/inspire-au-wfs.asp?service=WFS&request=GetFeature&typeName=au:AdministrativeUnit&maxFeatures=1&featureID=%s&version=2.0.0' %gml_id r=requests.get(url,stream=False) if r.status_code==200: tree=etree.parse(BytesIO(r.content)) root=tree.getroot() geom=root.find('.//{http://www.opengis.net/gml/3.2}MultiSurface') geom_ogr=ogr.CreateGeometryFromGML(etree.tostring(geom).decode()) return geom_ogr.ExportToWkt() else: return 'WFS no works' ''' #inicializace promenne typu Session , pouziva se pri stahovani s = requests.Session() retries = Retry(total=5, backoff_factor=1, status_forcelist=[502, 503, 504]) s.mount('http://', HTTPAdapter(max_retries=retries)) #slovnik parametru, ktere muzou byt pouzite v URL odkazech na datove sady replacement_dictionary = { "[posledni_den_mesice]": (datetime.datetime.today().replace(day=1) - datetime.timedelta(days=1)).strftime('%Y%m%d'), "[lpis_cz__posledni_aktualizace]": lpis_cz__posledni_aktualizace().strftime('%Y%m%d'), "[vcera]": (datetime.datetime.today().replace(day=1) - datetime.timedelta(days=1)).strftime('%Y%m%d') } #datove struktury; jsou hlavne treba pro tvorbu tabulek v Postgresu json_feature_structure = [{ "name": "id",
def __init__( self, host=None, port=None, address=None, token=None, index=None, hostname=None, source=None, sourcetype='text', verify=True, timeout=60, sleep_interval=2.0, queue_size=0, debug=False, retry_count=20, retry_backoff=2.0, run_once=False): """__init__ Initialize the SplunkPublisher :param host: Splunk fqdn :param port: Splunk HEC Port 8088 :param address: Splunk fqdn:8088 - overrides host and port :param token: Pre-existing Splunk token :param index: Splunk index :param hostname: Splunk address <host:port> :param source: source for log records :param sourcetype: json :param verify: verify using certs :param timeout: HTTP request timeout in seconds :param sleep_interval: Flush the queue of logs interval in seconds :param queue_size: Queue this number of logs before dropping new logs with 0 is an infinite number of messages :param debug: debug the publisher :param retry_count: number of publish retries per log record :param retry_backoff: cooldown timer in seconds :param run_once: test flag for running this just one time """ global instances instances.append(self) logging.Handler.__init__(self) self.host = host if self.host is None: self.host = SPLUNK_HOST self.port = port if self.port is None: self.port = SPLUNK_PORT if address: address_split = address.split(':') self.host = address_split[0] self.port = int(address_split[1]) self.token = token if self.token is None: self.token = SPLUNK_TOKEN self.index = index if self.index is None: self.index = SPLUNK_INDEX self.source = source if self.source is None: self.source = SPLUNK_SOURCE self.sourcetype = sourcetype if self.sourcetype is None: self.sourcetype = SPLUNK_SOURCETYPE self.verify = verify if self.verify is None: self.verify = SPLUNK_VERIFY self.timeout = timeout if self.timeout is None: self.timeout = SPLUNK_TIMEOUT self.sleep_interval = sleep_interval if self.sleep_interval is None: self.sleep_interval = SPLUNK_SLEEP_INTERVAL self.retry_count = retry_count if self.retry_count is None: self.retry_count = SPLUNK_RETRY_COUNT self.retry_backoff = retry_backoff if self.retry_backoff is None: self.retry_backoff = SPLUNK_RETRY_BACKOFF self.queue_size = queue_size if self.queue_size is None: self.queue_size = SPLUNK_QUEUE_SIZE self.log_payload = '' self.timer = None self.tid = None self.manager = multiprocessing.Manager() self.queue = self.manager.Queue(maxsize=self.queue_size) self.session = requests.Session() self.shutdown_event = multiprocessing.Event() self.shutdown_ack = multiprocessing.Event() self.already_done = multiprocessing.Event() self.testing = False self.shutdown_now = False self.run_once = run_once self.debug_count = 0 self.debug = debug if SPLUNK_DEBUG: self.debug = True self.debug_log('starting debug mode') if hostname is None: self.hostname = socket.gethostname() else: self.hostname = hostname self.debug_log('preparing to override loggers') # prevent infinite recursion by silencing requests and urllib3 loggers logging.getLogger('requests').propagate = False logging.getLogger('urllib3').propagate = False # and do the same for ourselves logging.getLogger(__name__).propagate = False # disable all warnings from urllib3 package if not self.verify: requests.packages.urllib3.disable_warnings() # Set up automatic retry with back-off self.debug_log('preparing to create a Requests session') retry = Retry( total=self.retry_count, backoff_factor=self.retry_backoff, method_whitelist=False, # Retry for any HTTP verb status_forcelist=[500, 502, 503, 504]) self.session.mount('https://', HTTPAdapter(max_retries=retry)) self.start_worker_thread( sleep_interval=self.sleep_interval) self.debug_log(( 'READY init - sleep_interval={}').format( self.sleep_interval))
def generateResponse(message): try: message = message.encode('utf-8') inputTokens = tokenize(message) print(inputTokens) perfectResponse = None if len(inputTokens) < 5: perfectResponse = attemptPerfectResponse(inputTokens) if perfectResponse: print('Found perf') return perfectResponse s = requests.Session() # Taken from datashamen's solution at http://stackoverflow.com/questions/15431044/can-i-set-max-retries-for-requests-request # Default max retries led to pooling issues retries = Retry(total=5, backoff_factor=0.1, status_forcelist=[500, 502, 503, 504]) s.mount('http://', HTTPAdapter(max_retries=retries)) print('Could not find perf') responseDistribution = {} # Adding extra func inputSize = len(inputTokens) # print('Printing potential responses:') # Iterate through each word in the question for inputToken in inputTokens: print(inputToken) # Find every response associated with a particular word in the question tokenResponses = [] url = 'http://localhost/WordFrequencyLookup/GetResponses.php?word=' + inputToken data = get_remote(url) if data is not None and data != '': try: if data != '': data = json.loads(data.decode('utf-8')) for index in range(0, len(data)): response = data[index]['wordResponse'] weight = float(data[index]['weight']) wordId = int(data[index]['wordId']) previousSubSize = \ int(data[index]['previousSubSize']) responseGroupId = \ int(data[index]['responseGroupId']) wordResponse = \ WordResponse.WordResponse(response, weight, wordId, previousSubSize, responseGroupId) tokenResponses.append(wordResponse) # Iterate through every response associated with a particular word in the question for response in tokenResponses: # Added # Find the number of word in a particular response responseSize = \ len(tokenize(response.nextSubtitle)) if response.weight < 0.7 \ and response.previousSubSize \ < inputSize + 2 \ and response.previousSubSize \ > inputSize - 2 or inputSize < 5: # If this response already exists in responseDistribution then we want to get the current # value/weight associated with it, so it can be incremented as the response has been found again # for another word responseValues = responseDistribution.get( response.nextSubtitle) newValue = response.weight responseId = response.responseGroupId isNewResponse = True if responseValues == None: responseValues = [] responseValues.append( [newValue, responseId]) isNewResponse = False else: for (index, (totalWeight, respGroupId) ) in enumerate(responseValues): if respGroupId == responseId: updatedValue = totalWeight + newValue responseValues[index] = ( updatedValue, responseId) isNewResponse = False if isNewResponse: responseValues.append( [newValue, responseId]) responseDistribution[ response.nextSubtitle] = responseValues except (ValueError, TypeError, IndexError, KeyError, e): print(e) print('No response found') bestResponse = None bestResponseValue = -1 almostPerfectResponseList = [] perfectResponseList = [] for (key, list) in responseDistribution.items(): for (index, (totalWeight, groupId)) in enumerate(list): if totalWeight >= bestResponseValue: bestResponseValue = totalWeight bestResponse = key elif totalWeight >= 0.99: print('Found a perfect response!' + str(value1)) perfectResponseList.append(key) elif totalWeight == 0.7: almostPerfectResponseList.append(key) if len(perfectResponseList) != 0: return random.choice(perfectResponseList) if len(almostPerfectResponseList) != 0: return random.choice(almostPerfectResponseList) return bestResponse except: e = sys.exc_info()[0] print(e) return 'Error: ' + str(e)
# Open database connection db = PyMySQL.connect(host="10.0.62.222", user="******", password="******", db="CampusData") # prepare a cursor object using cursor() method cursor = db.cursor() cursor.execute( 'CREATE TABLE IF NOT EXISTS occupancy(anonID real, timeEpoch real, area varchar(10),building varchar(10),floor varchar(10) ,ap varchar(10), str varchar(50))' ) urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) # Define Requests Session session = requests.Session() retry = Retry(connect=3, backoff_factor=0.5) adapter = HTTPAdapter(max_retries=retry) session.mount('http://', adapter) session.mount('https://', adapter) headers = { 'Connection': 'keep-alive', 'Authorization': 'Basic Z3Vlc3Q6SWl0Z25AMjAxOSQ=', 'Upgrade-Insecure-Requests': '1', 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36', 'Accept': '*/*', 'Referer': 'https://10.1.0.10/screens/dashboard.html', 'Accept-Encoding': 'gzip, deflate, br', 'Accept-Language': 'en-GB,en-US;q=0.9,en;q=0.8', } response = session.get('https://10.1.0.10/screens/dashboard.html',
from requests.adapters import HTTPAdapter from requests.packages.urllib3.util.retry import Retry import github3 from github3 import GitHub from github3 import login from github3.pulls import ShortPullRequest from github3.session import GitHubSession from cumulusci.core.exceptions import GithubException # Prepare request retry policy to be attached to github sessions. # 401 is a weird status code to retry, but sometimes it happens spuriously # and https://github.community/t5/GitHub-API-Development-and/Random-401-errors-after-using-freshly-generated-installation/m-p/22905 suggests retrying retries = Retry(status_forcelist=(401, 502, 503, 504), backoff_factor=0.3) adapter = HTTPAdapter(max_retries=retries) def get_github_api(username=None, password=None): """Old API that only handles logging in as a user. Here for backwards-compatibility during the transition. """ gh = login(username, password) gh.session.mount("http://", adapter) gh.session.mount("https://", adapter) return gh INSTALLATIONS = {}
def __init__(self): self._session = requests.Session() retry = Retry(total=5) adapter = HTTPAdapter(max_retries=retry) self._session.mount("https://", adapter) self._session.mount("http://", adapter)
def _spawnsession(self): self.session = requests.Session() retries = Retry(total=5, backoff_factor=0.5, status_forcelist=[502, 503, 504]) self.session.mount('https://', HTTPAdapter(max_retries=retries))
def _request( self, method: str, path: str, params: Dict[str, JSONLike] = None, server: str = None, headers: dict = None, token: str = None, ) -> "requests.models.Response": """ Runs any specified request (GET, POST, DELETE) against the server Args: - method (str): The type of request to be made (GET, POST, DELETE) - path (str): Path of the API URL - params (dict, optional): Parameters used for the request - server (str, optional): The server to make requests against, base API server is used if not specified - headers (dict, optional): Headers to pass with the request - token (str): an auth token. If not supplied, the `client.access_token` is used. Returns: - requests.models.Response: The response returned from the request Raises: - ClientError: if the client token is not in the context (due to not being logged in) - ValueError: if a method is specified outside of the accepted GET, POST, DELETE - requests.HTTPError: if a status code is returned that is not `200` or `401` """ if server is None: server = self.api_server assert isinstance(server, str) # mypy assert if token is None: token = self.get_auth_token() url = urljoin(server, path.lstrip("/")).rstrip("/") params = params or {} headers = headers or {} if token: headers["Authorization"] = "Bearer {}".format(token) headers["X-PREFECT-CORE-VERSION"] = str(prefect.__version__) session = requests.Session() retries = Retry( total=6, backoff_factor=1, status_forcelist=[500, 502, 503, 504], method_whitelist=["DELETE", "GET", "POST"], ) session.mount("https://", HTTPAdapter(max_retries=retries)) if method == "GET": response = session.get(url, headers=headers, params=params, timeout=30) elif method == "POST": response = session.post(url, headers=headers, json=params, timeout=30) elif method == "DELETE": response = session.delete(url, headers=headers, timeout=30) else: raise ValueError("Invalid method: {}".format(method)) # Check if request returned a successful status response.raise_for_status() return response
def __init__(self, url: str, user: str, password: str, *, verify: bool = True, timeout: Optional[float] = None, retry: Optional[dict] = None): """ Swarm client class. * url: ``str`` Url of Swarm server, must include API version. * user: ``str`` User name, login. * password: ``str`` Password for user. * verify: ``bool`` (optional) Verify SSL (default: true). * timeout: ``float`` (optional) HTTP request timeout. * retry: ``dict`` (optional) Retry options to prevent failures if server restarting or temporary network problem. Disabled by default use total > 0 to enable. - total: ``int`` Total retries count. - factor: ``int`` Sleep factor between retries (default 1) {factor} * (2 ** ({number of total retries} - 1)) - statuses: ``List[int]`` HTTP statues retries on. (default []) Example: .. code-block:: python retry = dict( total=10, factor=1, statuses=[500] ) With factor = 1 ============ ============= Retry number Sleep ============ ============= 1 0.5 seconds 2 1.0 seconds 3 2.0 seconds 4 4.0 seconds 5 8.0 seconds 6 16.0 seconds 7 32.0 seconds 8 1.1 minutes 9 2.1 minutes 10 4.3 minutes 11 8.5 minutes 12 17.1 minutes 13 34.1 minutes 14 1.1 hours 15 2.3 hours 16 4.6 hours 17 9.1 hours 18 18.2 hours 19 36.4 hours 20 72.8 hours ============ ============= :returns: ``SwarmClient instance`` :raises: ``SwarmError`` """ super().__init__() self.host, self.version = self._get_host_and_api_version(url) self.session = Session() self.session.auth = (user, password) self.timeout = timeout self.verify = verify if not retry: return self._validate_retry_argument(retry) adapter = HTTPAdapter(max_retries=Retry( total=retry['total'], backoff_factor=retry.get('factor', 1), status_forcelist=retry.get('statuses', []), method_whitelist=['GET', 'POST', 'PATCH'], )) self.session.mount('http://', adapter) self.session.mount('https://', adapter)
from multiprocessing.pool import ThreadPool SUPPORTS_POOL = True except Exception: SUPPORTS_POOL = False ADDON_ID = "script.module.metadatautils" KODI_LANGUAGE = xbmc.getLanguage(xbmc.ISO_639_1) if not KODI_LANGUAGE: KODI_LANGUAGE = "en" KODI_VERSION = int(xbmc.getInfoLabel("System.BuildVersion").split(".")[0]) # setup requests with some additional options requests.packages.urllib3.disable_warnings() SESSION = requests.Session() RETRIES = Retry(total=5, backoff_factor=5, status_forcelist=[500, 502, 503, 504]) SESSION.mount('http://', HTTPAdapter(max_retries=RETRIES)) SESSION.mount('https://', HTTPAdapter(max_retries=RETRIES)) FORCE_DEBUG_LOG = False LIMIT_EXTRAFANART = 0 try: ADDON = xbmcaddon.Addon(ADDON_ID) FORCE_DEBUG_LOG = ADDON.getSetting('debug_log') == 'true' LIMIT_EXTRAFANART = int(ADDON.getSetting('max_extrafanarts')) del ADDON except Exception: pass def log_msg(msg, loglevel=xbmc.LOGDEBUG):
from requests.packages.urllib3.util.retry import Retry from datetime import datetime, timedelta, timezone, date from utils import * import json import csv import olefile import tempfile import time from random import randint import pandas as pd import os import re retry_strategy = Retry( total=10, status_forcelist=[429, 500, 502, 503, 504], method_whitelist=["HEAD", "GET", "OPTIONS"], backoff_factor=5, ) retry_adapter = HTTPAdapter(max_retries=retry_strategy) def update_companies(db, config, ensure_indexes=True): resp = requests.get(config.get("asx_companies")) if ensure_indexes: db.companies.create_index([("asx_code", pymongo.ASCENDING)], unique=True) fname = "{}/companies.{}.csv".format(config.get("data_root"), datetime.now().strftime("%Y-%m-%d")) df = None n = 0
def request_session( url: str, method: str, payload: dict = None, headers: dict = None, username: str = settings.ONA_USERNAME, password: str = settings.ONA_PASSWORD, retries=3, backoff_factor=1.1, status_forcelist=(500, 502, 504), ): # pylint: disable=too-many-arguments """ Custom Method that takes in a URL, Method(GET / POST) and optionally retries, backoff_factor and status_forcelist. It creates a Request Session and Retry Object and mounts a HTTP Adapter to the Session and Sends a request to the url. It then returns the Response. The backoff policy is documented here: https://urllib3.readthedocs.io/en/latest/reference/urllib3.util.html#module-urllib3.util.retry """ # noqa session = requests.Session() retries = Retry(total=retries, read=retries, connect=retries, backoff_factor=backoff_factor, status_forcelist=status_forcelist) if username is not None: basic_auth = (username, password) else: basic_auth = None adapter = HTTPAdapter(max_retries=retries) session.mount('https://', adapter) session.mount('http://', adapter) if method == 'GET': response = session.get(url, auth=basic_auth, params=payload, headers=headers) return response if method == 'POST': response = session.post(url, auth=basic_auth, json=payload, headers=headers) return response if method == 'PATCH': response = session.patch(url, auth=basic_auth, json=payload, headers=headers) return response if method == 'PUT': response = session.put(url, auth=basic_auth, json=payload, headers=headers) return response if method == 'DELETE': response = session.delete(url, auth=basic_auth, headers=headers) return response return None
def with_retry(self, retry: Retry = Retry(total=5, connect=5, backoff_factor=0.01)) -> "RestClient": self.session.mount(self.server_url, HTTPAdapter(max_retries=retry)) return self
def sitemap_urls_from_robots(robots_text): """Return an iterator over all sitemap urls contained in the given robots.txt file """ for line in robots_text.splitlines(): if line.lstrip().startswith('Sitemap:'): yield line.split(':', 1)[1].strip() results = set() results = set(json.load(open('data/not_in_sitemap.json'))) print(results) session = requests.Session() retries = Retry(total=5, backoff_factor=0.1, status_forcelist=[503]) session.mount("https://chrome.google.com/", HTTPAdapter(max_retries=retries)) def signal_handler(signal, frame): print('Ctrl-C pressed; saving what we have so far') save() shutil.copy('crawled/sitemap/result.json', 'data/sitemap.json') sys.exit(0) def save(): json.dump(sorted(list(results)), open('crawled/sitemap/result.json', 'w'), indent=2, sort_keys=True)
import logging import re from urllib.parse import urljoin from requests import Session from requests.adapters import HTTPAdapter from requests.packages.urllib3.util.retry import Retry from svtplay_dl.utils.output import formatname from svtplay_dl.utils.parser import Options # Used for UA spoofing in get_http_data() FIREFOX_UA = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.3" retry = Retry(total=5, read=5, connect=5, backoff_factor=0.3, status_forcelist=(500, 502, 504)) class HTTP(Session): def __init__(self, config=dict(), *args, **kwargs): Session.__init__(self, *args, **kwargs) adapter = HTTPAdapter(max_retries=retry) self.mount("http://", adapter) self.mount("https://", adapter) self.verify = config.get("ssl_verify") self.proxy = config.get("proxy") if config.get("http_headers"): self.headers.update(self.split_header(config.get("http_headers")))
def get_dates(queue, start_date='2010/07/17', end_date='2023/01/07'): from bs4 import BeautifulSoup #module for web scraping install by pip install beautifulsoup4 import requests #for requesting html. install by pip install requests from requests.adapters import HTTPAdapter from requests.packages.urllib3.util.retry import Retry import re #regular expression for data extraction by pattern matching. installed by default. import pandas as pd # for dataframe. install by pip install pandas from csv import reader #for list structure. installed by default. from tqdm import tqdm import os import shutil import uuid import time start_date = start_date end_date = end_date dir = str(uuid.uuid4()) if os.path.exists(dir): shutil.rmtree(dir) os.makedirs(dir) features=pd.DataFrame({'features':['transactions','size','sentbyaddress','difficulty','hashrate','mining_profitability','sentinusd','transactionfees','median_transaction_fee','confirmationtime','transactionvalue','mediantransactionvalue','activeaddresses','top100cap','fee_to_reward','price']}) indicators=pd.DataFrame({'indicators':['sma','ema','wma','trx','mom','std','var','rsi','roc']}) periods=pd.DataFrame({'periods':['3','7','14','30','90']}) crypto=pd.DataFrame({'crypto':['btc']}) df=pd.concat([crypto, features,indicators,periods], axis=1) #for raw values #all kinds of fees and transaction values are in USD. divide by price USD to obtain BTC url_list=[] #stores generated urls feature_list=[] #store feature names i=0 while (i<=15): #this loop generates urls for raw values url='https://bitinfocharts.com/comparison/'+df['features'][i]+'-'+'btc'+'.html' feature = df['features'][i] if "fee" in feature: feature=df['features'][i]+'USD' if 'value' in feature: feature=df['features'][i]+'USD' if 'usd' in feature: feature=df['features'][i]+'USD' url_list.append(url) feature_list.append(feature) #print(feature,' ',url) i=i+1 #for indicators #all kinds of fees and transaction values are in USD. drop them or recalculate them after converting the raw values to the BTC i=0 while (i<=15): #this nested while loop generates url structure for all the indicators. for other currencies change btc to CURRENCY_NAME j=0 while (j<=8): k=0 while (k<=4): url='https://bitinfocharts.com/comparison/'+df['features'][i]+'-'+'btc'+'-'+df['indicators'][j]+df['periods'][k]+'.html' feature=df['features'][i]+df['periods'][k]+df['indicators'][j] if "fee" in feature: feature=df['features'][i]+df['periods'][k]+df['indicators'][j]+'USD' if 'value' in feature: feature=df['features'][i]+df['periods'][k]+df['indicators'][j]+'USD' if 'price' in feature: feature=df['features'][i]+df['periods'][k]+df['indicators'][j]+'USD' if 'usd' in feature: feature=df['features'][i]+df['periods'][k]+df['indicators'][j]+'USD' if 'fee_in_reward' in feature: feature=df['features'][i]+df['periods'][k]+df['indicators'][j] url_list.append(url) feature_list.append(feature) #print(feature,' ',url) k=k+1 j=j+1 i=i+1 df_feature=pd.DataFrame(feature_list,columns=['Features']) # convert feature list to dataframe df_url=pd.DataFrame(url_list,columns=['URL']) # convert url list to dataframe df2=df_feature.join(df_url) # join the feature and url dataframes features=pd.DataFrame(columns=df2.Features) #change the feature list to columns columns=len(features.columns) #to be used in while loop for getting data columns date=[] #create a date column for each feature. this is necessary for aligning by dates later print('Building URLs ...') for i in tqdm(range(len(features.columns))): date=features.columns[i] + 'Date' features[date]=date i=0 print('Requesting data ... ') for i in tqdm(range(columns)): #the most important. getting the data from the website. DONT ABUSE IT. you might be IP banned for requesting a lot columnNames=[features.columns[i+columns],features.columns[i]] url = df2.URL[i] session = requests.Session() retry = Retry(connect=10, backoff_factor=3) adapter = HTTPAdapter(max_retries=retry) session.mount('http://', adapter) session.mount('https://', adapter) page=session.get(url) #page = requests.get(url, time.sleep(3),timeout=10) #print(page) soup = BeautifulSoup(page.content, 'html.parser') values=soup.find_all('script')[5].get_text() newval=values.replace('[new Date("','') newval2=newval.replace('"),',";") newval3=newval2.replace('],',',') newval4=newval3.replace('],',']]') newval5=newval4.replace('null','0') x = re.findall('\\[(.+?)\\]\\]', newval5) df3=pd.DataFrame( list(reader(x))) df_transposed=df3.transpose() df_transposed.columns=['Value'] df_new=df_transposed['Value'].str.split(';', 1, expand=True) df_new.columns= columnNames mask = (df_new[features.columns[i+columns]] >= start_date) & (df_new[features.columns[i+columns]] <= end_date) df_new= df_new.loc[mask] features[features.columns[i]] = df_new[features.columns[i]] features[features.columns[i+columns]]= df_new[features.columns[i+columns]] df_new.columns = df_new.columns.str.replace('.*Date*', 'Date') path=dir+'/'+features.columns[i]+'.csv' df_new.set_index('Date', inplace=True) df_new.to_csv(path,sep=',', columns=[features.columns[i]])
from typing import BinaryIO import requests from requests.adapters import HTTPAdapter from requests.packages.urllib3.util.retry import Retry default_retry = Retry(total=10, status_forcelist=[429, 500, 502, 503, 504], method_whitelist=["HEAD", "GET"]) def http_session(session: requests.Session = None, retry: Retry = None) -> requests.Session: session = session or requests.Session() retry = retry or default_retry adapter = HTTPAdapter(max_retries=retry) session.mount("https://", adapter) session.mount("http://", adapter) return session # Instantiate a default session. It's useful to have a common session to take advantage of connection pooling. # Users can modify stuff by replacing this session or insantiation a new one. Note that sessions can be used # as context managers: # with http_session(...) as http: # http.get(...) http = http_session()
default_appconfig_file)) as conf: conf_type = None for line in conf: #look for [db] line, followed by uri m = re.match(r'\[([^]]+)\]', line) if m: conf_type = m.group(1) if conf_type == 'api' and args.EOL_API_key is None: m = re.match('eol_api_key\s*=\s*(\S+)', line) if m: args.EOL_API_key = m.group(1) #make a single http session, which we can tweak s = requests.Session() retries = Retry(total=args.retries, backoff_factor=2, status_forcelist=[500, 502, 503, 504]) s.mount('http://', HTTPAdapter(max_retries=retries)) if args.omit_suffix_dir: save_dir = lambda DOid: args.output_dir else: save_dir = lambda DOid: os.path.join(args.output_dir, subdir_name(DOid) ) if args.DOid: for d in args.DOid: get_file_from_doID(d, s, save_dir(d), args.thumbnail_size, args.EOL_API_key, args.add_percent, args.force_overwrite) if args.file:
def __init__(self, credentials, *, scopes=None, proxy_server=None, proxy_port=8080, proxy_username=None, proxy_password=None, requests_delay=200, raise_http_errors=True, request_retries=3, token_file_name=None, token_backend=None, tenant_id='common', **kwargs): """ Creates an API connection object :param tuple credentials: a tuple of (client_id, client_secret) Generate client_id and client_secret in https://apps.dev.microsoft.com :param list[str] scopes: list of scopes to request access to :param str proxy_server: the proxy server :param int proxy_port: the proxy port, defaults to 8080 :param str proxy_username: the proxy username :param str proxy_password: the proxy password :param int requests_delay: number of milliseconds to wait between api calls. The Api will respond with 429 Too many requests if more than 17 requests are made per second. Defaults to 200 milliseconds just in case more than 1 connection is making requests across multiple processes. :param bool raise_http_errors: If True Http 4xx and 5xx status codes will raise as exceptions :param int request_retries: number of retries done when the server responds with 5xx error codes. :param str token_file_name: custom token file name to be used when storing the OAuth token credentials. :param BaseTokenBackend token_backend: the token backend used to get and store tokens :param str tenant_id: use this specific tenant id, defaults to common :param dict kwargs: any extra params passed to Connection :raises ValueError: if credentials is not tuple of (client_id, client_secret) """ if not isinstance(credentials, tuple) or len(credentials) != 2 or ( not credentials[0] and not credentials[1]): raise ValueError('Provide valid auth credentials') self.auth = credentials self.scopes = scopes self.store_token = True # TODO: remove "token_file_name" in a future release if token_file_name is not None: warnings.warn( '"token_file_name" will be removed in future versions.' ' Please use "token_backend" instead', DeprecationWarning) token_backend = token_backend or FileSystemTokenBackend( token_filename=token_file_name) if not isinstance(token_backend, BaseTokenBackend): raise ValueError( '"token_backend" must be an instance of a subclass of BaseTokenBackend' ) self.token_backend = token_backend self.session = None # requests Oauth2Session object self.proxy = {} self.set_proxy(proxy_server, proxy_port, proxy_username, proxy_password) self.requests_delay = requests_delay or 0 self._previous_request_at = None # store previous request time self.raise_http_errors = raise_http_errors self.request_retries = request_retries self.naive_session = Session() # requests Session object self.naive_session.proxies = self.proxy if self.request_retries: retry = Retry(total=self.request_retries, read=self.request_retries, connect=self.request_retries, backoff_factor=RETRIES_BACKOFF_FACTOR, status_forcelist=RETRIES_STATUS_LIST) adapter = HTTPAdapter(max_retries=retry) self.naive_session.mount('http://', adapter) self.naive_session.mount('https://', adapter) self._oauth2_authorize_url = 'https://login.microsoftonline.com/' \ '{}/oauth2/v2.0/authorize'.format(tenant_id) self._oauth2_token_url = 'https://login.microsoftonline.com/' \ '{}/oauth2/v2.0/token'.format(tenant_id)
def getHttpSession() -> requests.Session: """Create a requests.Session pre-configured with environment variable data Returns ------- session : `requests.Session` An http session used to execute requests. Notes ----- The following environment variables must be set: - LSST_BUTLER_WEBDAV_CA_BUNDLE: the directory where CA certificates are stored if you intend to use HTTPS to communicate with the endpoint. - LSST_BUTLER_WEBDAV_AUTH: which authentication method to use. Possible values are X509 and TOKEN - (X509 only) LSST_BUTLER_WEBDAV_PROXY_CERT: path to proxy certificate used to authenticate requests - (TOKEN only) LSST_BUTLER_WEBDAV_TOKEN_FILE: file which contains the bearer token used to authenticate requests - (OPTIONAL) LSST_BUTLER_WEBDAV_EXPECT100: if set, we will add an "Expect: 100-Continue" header in all requests. This is required on certain endpoints where requests redirection is made. """ retries = Retry(total=3, backoff_factor=1, status_forcelist=[429, 500, 502, 503, 504]) session = requests.Session() session.mount("http://", HTTPAdapter(max_retries=retries)) session.mount("https://", HTTPAdapter(max_retries=retries)) log.debug("Creating new HTTP session...") try: env_auth_method = os.environ['LSST_BUTLER_WEBDAV_AUTH'] except KeyError: raise KeyError( "Environment variable LSST_BUTLER_WEBDAV_AUTH is not set, " "please use values X509 or TOKEN") if env_auth_method == "X509": log.debug("... using x509 authentication.") try: proxy_cert = os.environ['LSST_BUTLER_WEBDAV_PROXY_CERT'] except KeyError: raise KeyError( "Environment variable LSST_BUTLER_WEBDAV_PROXY_CERT is not set" ) session.cert = (proxy_cert, proxy_cert) elif env_auth_method == "TOKEN": log.debug("... using bearer-token authentication.") refreshToken(session) else: raise ValueError( "Environment variable LSST_BUTLER_WEBDAV_AUTH must be set to X509 or TOKEN" ) ca_bundle = None try: ca_bundle = os.environ['LSST_BUTLER_WEBDAV_CA_BUNDLE'] except KeyError: log.warning( "Environment variable LSST_BUTLER_WEBDAV_CA_BUNDLE is not set: " "HTTPS requests will fail. If you intend to use HTTPS, please " "export this variable.") session.verify = ca_bundle log.debug("Session configured and ready.") return session
def attemptPerfectResponse(inputTokens): perfectResponseList = [] firstRun = True inputSize = len(inputTokens) s = requests.Session() # Taken from datashamen's solution at http://stackoverflow.com/questions/15431044/can-i-set-max-retries-for-requests-request # Default max retries led to pooling issues retries = Retry(total=5, backoff_factor=0.1, status_forcelist=[500, 502, 503, 504]) s.mount('http://', HTTPAdapter(max_retries=retries)) # Iterate through each word in the question for inputToken in inputTokens: print(inputToken) # Find every response associated with a particular word in the question tokenResponses = [] url = \ 'http://localhost/WordFrequencyLookup/GetResponses.php?word=' \ + inputToken data = get_remote(url) if data is not None and data != '': try: if data != '': data = json.loads(data.decode('utf-8')) for index in range(0, len(data)): response = data[index]['wordResponse'] weight = float(data[index]['weight']) wordId = int(data[index]['wordId']) previousSubSize = \ int(data[index]['previousSubSize']) responseGroupId = \ int(data[index]['responseGroupId']) wordResponse = \ WordResponse.WordResponse(response, weight, wordId, previousSubSize, responseGroupId) tokenResponses.append(wordResponse) currentResponseList = [] for response in tokenResponses: if firstRun and response.previousSubSize == inputSize: perfectResponseList.append(response.nextSubtitle) elif response.previousSubSize == inputSize: currentResponseList.append(response.nextSubtitle) if firstRun: firstRun = False else: perfectResponseList = set(perfectResponseList) & set( currentResponseList) except (ValueError, TypeError, IndexError, KeyError, e): print(e) print('No response found') if perfectResponseList: return ''.join(random.sample(perfectResponseList, 1)) return None
# This is a sample Python script. # Press Shift+F10 to execute it or replace it with your code. # Press Double Shift to search everywhere for classes, files, tool windows, actions, and settings. import requests from bs4 import BeautifulSoup from flask import Flask, send_from_directory, render_template, request import csv from requests.adapters import HTTPAdapter from requests.packages.urllib3.util.retry import Retry from os import environ retry_strategy = Retry(total=3, status_forcelist=[429, 500, 502, 503, 504], method_whitelist=["HEAD", "GET", "OPTIONS"]) adapter = HTTPAdapter(max_retries=retry_strategy) http = requests.Session() http.mount("https://", adapter) http.mount("http://", adapter) headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) ' 'Chrome/88.0.4324.190 Safari/537.36' } app = Flask(__name__) @app.route('/') def HomePage(): return render_template('WebScrapping.html')
filename, format, source=source, targets=targets, **kw, ) class RootFlowState(FlowState): """root flow state""" kind = "root" _dict_fields = ["states", "engine", "final_state", "on_error"] http_adapter = HTTPAdapter(max_retries=Retry( total=3, backoff_factor=1, status_forcelist=[500, 502, 503, 504])) class RemoteHttpHandler: """class for calling remote endpoints""" def __init__(self, url): self.url = url self.format = "json" self._session = requests.Session() self._session.mount("http://", http_adapter) self._session.mount("https://", http_adapter) def do_event(self, event): kwargs = {} kwargs["headers"] = event.headers or {} method = event.method or "POST"
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") # Create formatter and add it to handlers f_format = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') f_handler.setFormatter(f_format) # # Add handlers to the logger logger_obj.addHandler(f_handler) return logger_obj logger = get_logger() retry_strategy = Retry( total=10, status_forcelist=[429, 500, 502, 503, 504], ) adapter = HTTPAdapter(max_retries=retry_strategy) client = requests.Session() client.mount("https://", adapter) credentials = json.load(open('auth.json')) client.auth = HTTPBasicAuth(credentials['username'], credentials['password']) def download_file(file_url, filename): if os.path.isfile(file_url): return file_url path, _ = os.path.split(filename) os.makedirs(path, exist_ok=True) result = requests.get(file_url) filename = re.sub(r"[<>?:|]", "", filename)
def __init__(self, gpu, locale="en_us"): self.product_ids = set([]) self.cli_locale = locale.lower() self.locale = self.map_locales() self.session = requests.Session() self.gpu = gpu self.enabled = True self.auto_buy_enabled = False self.attempt = 0 self.started_at = datetime.now() self.gpu_long_name = GPU_DISPLAY_NAMES[gpu] if path.exists(AUTOBUY_CONFIG_PATH): with open(AUTOBUY_CONFIG_PATH) as json_file: try: self.config = json.load(json_file) except Exception as e: log.error( "Your `autobuy_config.json` file is not valid json.") raise e if self.has_valid_creds(): self.nvidia_login = self.config["NVIDIA_LOGIN"] self.nvidia_password = self.config["NVIDIA_PASSWORD"] self.auto_buy_enabled = self.config["FULL_AUTOBUY"] self.cvv = self.config.get("CVV") else: raise InvalidAutoBuyConfigException(self.config) else: log.info("No Autobuy creds found.") # Disable auto_buy_enabled if the user does not provide a bool. if type(self.auto_buy_enabled) != bool: self.auto_buy_enabled = False adapter = TimeoutHTTPAdapter(max_retries=Retry( total=10, backoff_factor=1, status_forcelist=[429, 500, 502, 503, 504], method_whitelist=["HEAD", "GET", "OPTIONS"], )) self.session.mount("https://", adapter) self.session.mount("http://", adapter) self.notification_handler = NotificationHandler() log.info("Opening Webdriver") self.driver = webdriver.Chrome(executable_path=binary_path, options=options, chrome_options=chrome_options) self.sign_in() selenium_utils.add_cookies_to_session_from_driver( self.driver, self.session) log.info("Adding driver cookies to session") log.info("Getting product IDs") self.token_data = self.get_nvidia_access_token() self.payment_option = self.get_payment_options() if not self.payment_option.get("id") or not self.cvv: log.error( "No payment option on account or missing CVV. Disable Autobuy") self.auto_buy_enabled = False else: log.debug(self.payment_option) self.ext_ip = self.get_ext_ip() if not self.auto_buy_enabled: log.info("Closing webdriver") self.driver.close() self.get_product_ids() while len(self.product_ids) == 0: log.info( f"We have no product IDs for {self.gpu_long_name}, retrying until we get a product ID" ) self.get_product_ids() sleep(5)
def __init__(self, info=None, request_charset='utf-8', response_charset=None): self._counter = 0 self._cookies_filename = '' self._cookies = LWPCookieJar() self.url = None self.user_agent = USER_AGENT self.content = None self.status = None self.username = None self.token = None self.passkey = None self.info = info self.proxy_url = None self.request_charset = request_charset self.response_charset = response_charset self.needs_proxylock = False self.headers = dict() self.request_headers = None self.session = requests.session() self.session.verify = False # Enabling retrying on failed requests retries = Retry(total=2, read=2, connect=2, redirect=3, backoff_factor=0.1, status_forcelist=[429, 500, 502, 503, 504]) self.session.mount('http://', HTTPAdapter(max_retries=retries)) self.session.mount('https://', HTTPAdapter(max_retries=retries)) # self.session = cfscrape.create_scraper() # self.scraper = cfscrape.create_scraper() # self.session = self.scraper.session() global dns_public_list global dns_opennic_list dns_public_list = get_setting("public_dns_list", unicode).replace(" ", "").split(",") dns_opennic_list = get_setting("opennic_dns_list", unicode).replace(" ", "").split(",") # socket.setdefaulttimeout(60) # Parsing proxy information proxy = { 'enabled': get_setting("proxy_enabled", bool), 'use_type': get_setting("proxy_use_type", int), 'type': proxy_types[0], 'host': get_setting("proxy_host", unicode), 'port': get_setting("proxy_port", int), 'login': get_setting("proxy_login", unicode), 'password': get_setting("proxy_password", unicode), } try: proxy['type'] = proxy_types[get_setting("proxy_type", int)] except: pass if get_setting("use_public_dns", bool): connection.create_connection = patched_create_connection if get_setting("use_da_inc_proxy", bool): dainc_addon = xbmcaddon.Addon(id='plugin.video.da_inc') if dainc_addon and dainc_addon.getSetting( 'internal_proxy_enabled') == "true": self.proxy_url = "{0}://{1}:{2}".format( "http", "127.0.0.1", "65222") if info and "internal_proxy_url" in info: self.proxy_url = info["internal_proxy_url"] self.session.proxies = { 'http': self.proxy_url, 'https': self.proxy_url, } elif proxy['enabled']: if proxy['use_type'] == 0 and info and "proxy_url" in info: log.debug("Setting proxy from da_inc: %s" % (info["proxy_url"])) elif proxy['use_type'] == 1: log.debug("Setting proxy with custom settings: %s" % (repr(proxy))) if proxy['login'] or proxy['password']: self.proxy_url = "{0}://{1}:{2}@{3}:{4}".format( proxy['type'], proxy['login'], proxy['password'], proxy['host'], proxy['port']) else: self.proxy_url = "{0}://{1}:{2}".format( proxy['type'], proxy['host'], proxy['port']) if self.proxy_url: self.session.proxies = { 'http': self.proxy_url, 'https': self.proxy_url, }
def get_stock_min(resolution): # Retry on Failure for finnhub API call limit retry_strategy = Retry(total=20, status_forcelist=[429], method_whitelist=["GET"], backoff_factor=1) adapter = HTTPAdapter(max_retries=retry_strategy) http = requests.Session() http.mount("https://", adapter) http.mount("http://", adapter) # Connect to an existing databse conn = db_connect() # Open a cursor to perform database operations cur = conn.cursor() # Setup client api_key = os.environ['API_KEY'] # Stock symbol list symbols = pd.read_csv('/home/ubuntu/luciexie-A1/companylist.csv') symbols = symbols['Symbol'] # Setup time # Initializing the past one year minute dataset, # due to the finnhub API restriction # a loop through the past 12 months is needed at the first time # one_yr = relativedelta(years = 1) # one_month = relativedelta(months = 1) # start = (datetime.now() - one_yr).timestamp() # end = start # To get the past one day daily data one_day = relativedelta(hours=24) end = datetime.now().timestamp() start = (datetime.now() - one_day).timestamp() # While loop through each month # while end < now: # end = (datetime.fromtimestamp(start) + one_month).timestamp() try: for symbol in symbols[:10]: # Stock candles api_link = 'https://finnhub.io/api/v1/stock/candle?' \ + 'symbol={symbol}&resolution={resolution}&from={from_t}' \ + '&to={to_t}&token={token}' endpoint = api_link.format(symbol=symbol, resolution=resolution, from_t=str(int(start)), to_t=str(int(end)), token=api_key) response = http.get(endpoint) response_dict = json.loads(response.content.decode('utf-8')) if response_dict.get('s') == 'ok': # Convert to Pandas Dataframe and display datetime stock_candles = pd.DataFrame.from_dict(response_dict) stock_candles['s'] = 'ok' stock_candles['dt'] = [ datetime.fromtimestamp(t) for t in stock_candles['t'] ] stock_candles.drop('t', axis=1, inplace=True) stock_candles['symbol'] = symbol # Convert UTC to EST stock_candles['dt'] = stock_candles['dt'].dt.tz_localize( 'UTC').dt.tz_convert('US/Eastern') # Export to csv buffer = StringIO() stock_candles.to_csv(buffer, index=False, header=False) buffer.seek(0) # Copying the data from csv cur.copy_from(buffer, 'stock_candles_minute', sep=',') conn.commit() else: print('Sorry, no valid data for ' + symbol + ' at this moment') # start = end cur.close() send_msg('COMPLETED') except: send_msg('ALERT') return
#!/usr/bin/env python3 import argparse import datetime import logging import os import requests import tempfile import zipfile from requests.adapters import HTTPAdapter from requests.packages.urllib3.util.retry import Retry zip_file_finder = "https://soa.smext.faa.gov/apra/nfdc/nasr/chart" edition_date = None retry_strategy = Retry(total=60, backoff_factor=1.0) adapter = HTTPAdapter(max_retries=retry_strategy) http = requests.Session() http.mount("https://", adapter) http.mount("http://", adapter) def get_download_url(edition="current"): global edition_date params = {'edition': edition} headers = {'Accept': 'application/json'} logging.info( "Checking {} to find the desired zip file".format(zip_file_finder)) r = http.get(zip_file_finder, params=params, headers=headers, timeout=10) edition_date = datetime.datetime.strptime( r.json()['edition'][0]['editionDate'], "%m/%d/%Y").date().isoformat()