def download_histories_csv(date_str): filename = 'data/intraday/us.intraday.polygon.history.csv' request_list = _get_requests(date_str) # request_list = request_list[:10] bt = BaseThrottler(name='base-throttler', delay=0.04) bt.start() throttled_requests = bt.multi_submit(request_list) print('shutting down the throttler') bt.shutdown() print('waiting for the requests to be done') bt.wait_end() print('run_done') responses = [tr.response for tr in throttled_requests] with open(filename, 'w') as outfile: outfile.write('date,time,close,open,high,low,volume,symbol\n') for cnt, res in enumerate(responses): if not res: print('The response is invalid: %s' % (res)) continue if res.status_code != 200: continue js = res.json() if 'results' not in js: print('The response does not have results: %s' % (js)) continue data = js['results'] if not data: continue symbol = js['ticker'] print('{cnt}th {symbol}, blobs: {l}'.format(cnt=cnt, symbol=symbol, l=len(data))) out_lines = [] for blob in data: epoch = int(blob['t']) // 1000 t = datetime.datetime.fromtimestamp(epoch).astimezone( _TZ_US_EAST) date_str = t.strftime('%Y-%m-%d') time_str = t.strftime('%H:%M:%S') close, open_, high, low, volume = blob['c'], blob['o'], blob[ 'h'], blob['l'], blob['v'] out_lines.append( '{date_str},{time_str},{close},{open},{high},{low},{volume},{symbol}\n' .format(date_str=date_str, time_str=time_str, close=close, open=open_, high=high, low=low, volume=volume, symbol=symbol)) outfile.writelines(out_lines)
def _run_requests_return_rows(request_list): bt = BaseThrottler(name='base-throttler', delay=0.5) bt.start() throttled_requests = bt.multi_submit(request_list) print('shutting down the throttler') bt.shutdown() print('waiting for the requests to be done') bt.wait_end() print('run_done') responses = [tr.response for tr in throttled_requests] rows = [] for cnt, res in enumerate(responses): if not res: print('The response is invalid: %s' % (res)) continue if res.status_code != 200: continue if not res: print('The response does not have contents: %s' % (res)) continue js = res.json() if 'status' not in js or (js['status'] != 'OK' and js['status'] != 'success'): print('The response does not have proper status: %s' % (js)) continue if 'tickers' not in js: print('The response does not have results: %s' % (js)) continue for i, ticker in enumerate(js['tickers']): symbol = ticker['ticker'] print('{cnt}th {symbol}'.format(cnt=cnt, symbol=symbol)) daily = ticker['day'] close, open_, high, low, volume = daily['c'], daily['o'], daily[ 'h'], daily['l'], daily['v'] epoch = int(ticker['1547787608999']) // 1000 t = _TZ_US_EAST.localize(datetime.datetime.fromtimestamp(epoch)) date_str = t.strftime('%Y-%m-%d') rows.append( '{date_str},{close},{open},{high},{low},{volume},{symbol}\n'. format(date_str=date_str, close=close, open=open_, high=high, low=low, volume=volume, symbol=symbol)) return rows
def _run_requests_return_rows(request_list): bt = BaseThrottler(name='base-throttler', delay=0.1) bt.start() throttled_requests = bt.multi_submit(request_list) print('shutting down the throttler') bt.shutdown() print('waiting for the requests to be done') bt.wait_end() print('run_done') responses = [tr.response for tr in throttled_requests] rows = [] for cnt, response in enumerate(responses): if not response: print('The response is invalid: %s' % (response)) continue if response.status_code != 200: print('response status code is not 200 OK: {code}'.format( code=response.status_code)) continue if not response: continue js = response.json() if not js: print('The response is invalid: %s' % (js)) continue if 'dataset' not in js: print('The response does not have dataset: %s' % (js)) continue if 'data' not in js['dataset']: print('The response data does not have data: %s' % (js)) continue symbol = js['dataset']['dataset_code'] data = js['dataset']['data'] for data_for_date in data: date_str = data_for_date[0] close, open_, high, low, volume = data_for_date[4], data_for_date[ 1], data_for_date[2], data_for_date[3], data_for_date[5] rows.append( '{date_str},{close},{open},{high},{low},{volume},{symbol}\n'. format(date_str=date_str, close=close, open=open_, high=high, low=low, volume=volume, symbol=symbol)) return rows
class FSEconomy(object): def __init__(self, local, user_key=None, service_key=None): requests_cache.install_cache('fse', expire_after=3600) self.bt = BaseThrottler(name='fse-throttler', reqs_over_time=(1, 2)) self.bt.start() self.airports = common.load_airports() self.aircraft = common.load_aircrafts() self.service_key = service_key self.user_key = user_key if local: self.assignments = common.load_pickled_assignments() else: self.assignments = self.get_assignments() def get_aggregated_assignments(self, cargo=False): if cargo: self.assignments = self.assignments[self.assignments.UnitType == 'kg'] else: self.assignments = self.assignments[self.assignments.UnitType == 'passengers'] grouped = self.assignments.groupby(['FromIcao', 'ToIcao'], as_index=False) aggregated = grouped.aggregate(np.sum) return aggregated.sort_values('Pay', ascending=False) def send_single_request(self, path): query_link = self.generate_request(const.LINK + path) request = requests.Request(method='GET', url=query_link) i = 0 while True: try: thottled_request = self.bt.submit(request) data = thottled_request.response.content if 'To many requests' in data or 'minimum delay' in data: raise requests.exceptions.ConnectionError return data except requests.exceptions.ConnectionError: requests_cache.clear() if i >= 10: raise print 'Retrying Request' i += 1 time.sleep(60) def send_multi_request(self, paths): request_queue = [] for path in paths: query_link = self.generate_request(const.LINK + path) request_queue.append(requests.Request(method='GET', url=query_link)) i = 0 while True: try: thottled_requests = self.bt.multi_submit(request_queue) responses = [tr.response for tr in thottled_requests] request_queue = [] complete_response = [] for response in responses: if 'To many requests' in response.content or 'minimum delay' in response.content: request_queue.append(response.url) print response.content elif 'you are now in a lockout period' in response.content: raise Exception(response.content) else: complete_response.append(response.content) if len(request_queue) > 0: raise requests.exceptions.ConnectionError return complete_response except AttributeError: for request in request_queue: print 'Error with request: ', request raise except requests.exceptions.ConnectionError: requests_cache.clear() if i >= 10: raise print 'Retrying Request' i += 1 time.sleep(60) def get_aircrafts_by_icaos(self, icaos): aircraft_requests = [] for icao in icaos: aircraft_requests.append('query=icao&search=aircraft&icao={}'.format(icao)) responses = self.send_multi_request(aircraft_requests) all_aircraft = [] for response in responses: aircraft = pd.DataFrame.from_csv(StringIO(response)) try: aircraft.RentalDry = aircraft.RentalDry.astype(float) aircraft.RentalWet = aircraft.RentalWet.astype(float) all_aircraft.append(aircraft) except: print 'error updating rental info: ', response return all_aircraft def get_assignments(self): assignments = pd.DataFrame() i = 0 assignment_requests = [] number_at_a_time = 1000 while i + number_at_a_time < len(self.airports): assignment_requests.append( 'query=icao&search=jobsfrom&icaos={}'.format('-'.join(self.airports.icao[i:i + number_at_a_time]))) i += number_at_a_time responses = self.send_multi_request(assignment_requests) for data in responses: assignments = pd.concat([assignments, pd.DataFrame.from_csv(StringIO(data))]) response = self.send_single_request('query=icao&search=jobsfrom&icaos={}'.format('-'.join(self.airports.icao[i:len(self.airports) - 1]))) assignments = pd.concat([assignments, pd.DataFrame.from_csv(StringIO(response))]) with open('assignments', 'wb') as f: pickle.dump(assignments, f) return assignments def get_best_assignments(self, row): df = self.assignments[(self.assignments.FromIcao == row['FromIcao']) & (self.assignments.ToIcao == row['ToIcao']) & (self.assignments.Amount <= row['Seats'])] if not len(df): return None prob = LpProblem("Knapsack problem", LpMaximize) w_list = df.Amount.tolist() p_list = df.Pay.tolist() x_list = [LpVariable('x{}'.format(i), 0, 1, 'Integer') for i in range(1, 1 + len(w_list))] prob += sum([x * p for x, p in zip(x_list, p_list)]), 'obj' prob += sum([x * w for x, w in zip(x_list, w_list)]) <= row['Seats'], 'c1' prob.solve() return df.iloc[[i for i in range(len(x_list)) if x_list[i].varValue]] def get_best_craft(self, icao, radius): print 'Searching for the best aircraft from {}'.format(icao) max_seats = 0 best_aircraft = None near_icaos = self.get_closest_airports(icao, radius).icao all_aircraft = self.get_aircrafts_by_icaos(near_icaos) for aircraft in all_aircraft: if not len(aircraft): continue merged = pd.DataFrame.merge(aircraft, self.aircraft, left_on='MakeModel', right_on='Model', how='inner') merged = merged[ (~merged.MakeModel.isin(const.IGNORED_AIRCRAFTS)) & (merged.RentalWet + merged.RentalDry > 0)] if not len(merged): continue aircraft = merged.ix[merged.Seats.idxmax()] if aircraft.Seats > max_seats: best_aircraft = aircraft max_seats = aircraft.Seats return best_aircraft def get_closest_airports(self, icao, nm): lat = self.airports[self.airports.icao == icao].lat.iloc[0] nm = float(nm) # one degree of latitude is appr. 69 nm lat_min = lat - nm / 69 lat_max = lat + nm / 69 filtered_airports = self.airports[self.airports.lat > lat_min] filtered_airports = filtered_airports[filtered_airports.lat < lat_max] distance_vector = filtered_airports.icao.map(lambda x: self.get_distance(icao, x)) return filtered_airports[distance_vector < nm] def get_distance(self, from_icao, to_icao): try: lat1, lon1 = [radians(x) for x in self.airports[self.airports.icao == from_icao][['lat', 'lon']].iloc[0]] lat2, lon2 = [radians(x) for x in self.airports[self.airports.icao == to_icao][['lat', 'lon']].iloc[0]] except IndexError: return 9999.9 return common.get_distance(lat1, lon1, lat2, lon2) def get_logs(self, from_id): key = self.user_key or self.service_key data = self.send_single_request('query=flightlogs&search=id&readaccesskey={}&fromid={}'.format(key, from_id)) logs = pd.DataFrame.from_csv(StringIO(data)) logs = logs[(logs.MakeModel != 'Airbus A321') & (logs.MakeModel != 'Boeing 737-800') & (logs.Type == 'flight')] logs['Distance'] = logs.apply(lambda x, self=self: self.get_distance(x['From'], x['To']), axis=1) logs = pd.merge(logs, self.aircraft, left_on='MakeModel', right_on='Model') logs['FlightTimeH'] = logs.apply(lambda x: int(x['FlightTime'].split(':')[0]), axis=1) logs['FlightTimeM'] = logs.apply(lambda x: int(x['FlightTime'].split(':')[1]), axis=1) logs = logs[(logs.FlightTimeH > 0) | (logs.FlightTimeM > 0)] logs = logs[logs.Distance > 0] logs['AvSpeed'] = logs.apply(lambda x: 60 * x['Distance'] / (60 * x['FlightTimeH'] + x['FlightTimeM']), axis=1) import pdb pdb.set_trace() def generate_request(self, query_link): if self.user_key: query_link += '&userkey={}'.format(self.user_key) elif self.service_key: query_link += '&servicekey={}'.format(self.service_key) return query_link def __del__(self): self.bt.shutdown()
def _run_requests_return_rows(request_list): bt = BaseThrottler(name='base-throttler', delay=0.04) bt.start() throttled_requests = bt.multi_submit(request_list) print('shutting down the throttler') bt.shutdown() print('waiting for the requests to be done') bt.wait_end() print('run_done') responses = [tr.response for tr in throttled_requests] rows = [] for cnt, res in enumerate(responses): if not res: print('The response is invalid: %s' % (res)) continue if res.status_code != 200: print('response status code is not 200 OK: {code}'.format( code=res.status_code)) continue js = res.json() req = request_list[cnt] m = re.search(r'stock/([^/]+)', req.url) if not m: continue if not m.groups(): continue symbol = m.groups()[0] if not js: continue print('{cnt}th {symbol}, blobs: {l}'.format(cnt=cnt, symbol=symbol, l=len(js))) prev_close = None for blob in js: keys = ['date', 'close', 'open', 'high', 'low', 'volume'] is_blob_compromised = False for k in keys: if k not in blob: print( 'blob: {blob} does not have all the expected keys, missing key: {key}' .format(blob=str(blob), key=k)) is_blob_compromised = True break if is_blob_compromised: continue date_str = blob['date'] close, open_, high, low, volume = blob['close'], blob[ 'open'], blob['high'], blob['low'], blob['volume'] if volume == '0' or volume == 0 or close is None: close, open_, high, low = prev_close, prev_close, prev_close, prev_close if close is None: continue rows.append( '{date_str},{close},{open},{high},{low},{volume},{symbol}\n'. format(date_str=date_str, close=close, open=open_, high=high, low=low, volume=volume, symbol=symbol)) prev_close = close return rows
class LogsClient: def __init__(self, logs_cache_dir): self.logs_cache_dir = logs_cache_dir + '/' self.throttler = BaseThrottler(name='base-throttler', delay=0.2) self.throttler.start() def fetchLogs(self, log_metadata): log_metadata_lookup = {log[u'id']: log for log in log_metadata} # fetch cached log data if not os.path.isdir(self.logs_cache_dir): os.makedirs(self.logs_cache_dir) cache_filepaths = { id: self.getLogFilepath(id) for id in log_metadata_lookup } existing_logs = { id: loadJson(filepath) for id, filepath in cache_filepaths.items() if os.path.isfile(filepath) } updated_log_ids = [ id for id, log in existing_logs.items() if log[u'info'][u'date'] < log_metadata_lookup[id][u'date'] ] # fetch any new uncached logs or logs that need to be updated fetched_log_ids = [ id for id in log_metadata_lookup if (id not in existing_logs or id in updated_log_ids) ] fetched_logs = {} if len(fetched_log_ids) > 0: reqs = [ requests.Request('GET', 'http://logs.tf/api/v1/log/' + str(id)) for id in fetched_log_ids ] throttled_requests = self.throttler.multi_submit(reqs) fetched_logs = { getLogIdFromUrl(tr.request.url): tr.response.json() for tr in throttled_requests } # update cache for id, log_json in fetched_logs.items(): saveJson(self.getLogFilepath(id), log_json) # return merged cached & new results, preferring the new results if any conflicts return {**existing_logs, **fetched_logs} def getUploaderLogMetadata(self, uploaderId): return self.throttler.submit( requests.Request( 'GET', 'http://logs.tf/api/v1/log?uploader=' + uploaderId + '&limit=10000')).response.json() def getLogFilepath(self, id): return self.logs_cache_dir + str(id) + '.json' def close(self): self.throttler.shutdown()
class ThrottledSession(): def __init__(self, config): super().__init__() self.session = self.createBaseSession() self.config = config self._delay = config.get('requestDelay') self.session.headers.update({'User-Agent': 'Niantic App'}) self.session.verify = True self.throttle = BaseThrottler(name='mainThrottle', session=self.session, delay=self._delay) self.throttle.start() self.orig = None def getThrottle(self): return self.throttle def createBaseSession(self): sess = session() sess.headers = { 'User-Agent': 'Niantic App', } sess.verify = False return sess def updateDelay(self): config_delay = self.config.get('requestDelay') if self._delay != config_delay: self._delay = config_delay if self.throttle: self.throttle._delay = self._delay def post(self, url, **kwargs): self.updateDelay() wrapper = Request(method='POST', url=url, **kwargs) res = self.throttle.submit(wrapper).response return res def get(self, url, **kwargs): self.updateDelay() wrapper = Request(method='GET', url=url, **kwargs) res = self.throttle.submit(wrapper).response return res def makeThrottle(self): throttle = BaseThrottler(name='mainThrottle', session=self.session, delay=self._delay) return throttle def restart(self): if self.orig: self.throttle.shutdown() self.throttle = self.orig self.throttle.unpause() self.orig = None def pauseExec(self): self.orig = self.throttle self.orig.pause() self.throttle = self.makeThrottle() self.throttle.start() def stop(self): self.throttle.shutdown() def makeNew(self): self.throttle.shutdown() time.sleep(1) self.throttle = self.makeThrottle() self.throttle.start()
def _run_requests_return_rows(request_list): bt = BaseThrottler(name='base-throttler', delay=0.04) bt.start() throttled_requests = bt.multi_submit(request_list) print('shutting down the throttler') bt.shutdown() print('waiting for the requests to be done') bt.wait_end() print('run_done') responses = [tr.response for tr in throttled_requests] rows = [] for cnt, res in enumerate(responses): if not res: print('The response is invalid: %s' % (res)) continue if res.status_code != 200: print('response status code is not 200 OK: {code}'.format( code=res.status_code)) continue if not res: continue js = res.json() if 'status' not in js or (js['status'] != 'OK' and js['status'] != 'success'): print('The response does not have proper status: %s' % (js)) continue keys = ['open', 'afterHours', 'high', 'low', 'volume', 'from'] is_blob_compromised = False for k in keys: if k not in js: print( 'blob: {blob} does not have all the expected keys, missing key: {key}' .format(blob=str(blob), key=k)) is_blob_compromised = True break if is_blob_compromised: continue symbol = js['symbol'] close, open_, high, low, volume = js['afterHours'], js['open'], js[ 'high'], js['low'], js['volume'] print('{symbol}'.format(symbol=symbol)) close_v = float(close) if close_v < 1.0 or close_v > 10000: continue date_str = datetime.datetime.strptime( js['from'], "%Y-%m-%dT%H:%M:%SZ").strftime("%Y-%m-%d") rows.append( '{date_str},{close},{open},{high},{low},{volume},{symbol}\n'. format(date_str=date_str, close=close, open=open_, high=high, low=low, volume=volume, symbol=symbol)) return rows