Example #1
0
def download_histories_csv(date_str):
    filename = 'data/intraday/us.intraday.polygon.history.csv'

    request_list = _get_requests(date_str)
    # request_list = request_list[:10]
    bt = BaseThrottler(name='base-throttler', delay=0.04)
    bt.start()
    throttled_requests = bt.multi_submit(request_list)

    print('shutting down the throttler')
    bt.shutdown()
    print('waiting for the requests to be done')
    bt.wait_end()
    print('run_done')
    responses = [tr.response for tr in throttled_requests]

    with open(filename, 'w') as outfile:
        outfile.write('date,time,close,open,high,low,volume,symbol\n')
        for cnt, res in enumerate(responses):

            if not res:
                print('The response is invalid: %s' % (res))
                continue

            if res.status_code != 200:
                continue

            js = res.json()
            if 'results' not in js:
                print('The response does not have results: %s' % (js))
                continue

            data = js['results']
            if not data:
                continue

            symbol = js['ticker']
            print('{cnt}th {symbol}, blobs: {l}'.format(cnt=cnt,
                                                        symbol=symbol,
                                                        l=len(data)))
            out_lines = []
            for blob in data:
                epoch = int(blob['t']) // 1000
                t = datetime.datetime.fromtimestamp(epoch).astimezone(
                    _TZ_US_EAST)
                date_str = t.strftime('%Y-%m-%d')
                time_str = t.strftime('%H:%M:%S')
                close, open_, high, low, volume = blob['c'], blob['o'], blob[
                    'h'], blob['l'], blob['v']
                out_lines.append(
                    '{date_str},{time_str},{close},{open},{high},{low},{volume},{symbol}\n'
                    .format(date_str=date_str,
                            time_str=time_str,
                            close=close,
                            open=open_,
                            high=high,
                            low=low,
                            volume=volume,
                            symbol=symbol))
            outfile.writelines(out_lines)
Example #2
0
def _run_requests_return_rows(request_list):
    bt = BaseThrottler(name='base-throttler', delay=0.5)
    bt.start()
    throttled_requests = bt.multi_submit(request_list)

    print('shutting down the throttler')
    bt.shutdown()
    print('waiting for the requests to be done')
    bt.wait_end()
    print('run_done')
    responses = [tr.response for tr in throttled_requests]

    rows = []
    for cnt, res in enumerate(responses):
        if not res:
            print('The response is invalid: %s' % (res))
            continue

        if res.status_code != 200:
            continue

        if not res:
            print('The response does not have contents: %s' % (res))
            continue

        js = res.json()
        if 'status' not in js or (js['status'] != 'OK'
                                  and js['status'] != 'success'):
            print('The response does not have proper status: %s' % (js))
            continue

        if 'tickers' not in js:
            print('The response does not have results: %s' % (js))
            continue

        for i, ticker in enumerate(js['tickers']):
            symbol = ticker['ticker']
            print('{cnt}th {symbol}'.format(cnt=cnt, symbol=symbol))

            daily = ticker['day']
            close, open_, high, low, volume = daily['c'], daily['o'], daily[
                'h'], daily['l'], daily['v']
            epoch = int(ticker['1547787608999']) // 1000
            t = _TZ_US_EAST.localize(datetime.datetime.fromtimestamp(epoch))
            date_str = t.strftime('%Y-%m-%d')
            rows.append(
                '{date_str},{close},{open},{high},{low},{volume},{symbol}\n'.
                format(date_str=date_str,
                       close=close,
                       open=open_,
                       high=high,
                       low=low,
                       volume=volume,
                       symbol=symbol))

    return rows
Example #3
0
def _run_requests_return_rows(request_list):
    bt = BaseThrottler(name='base-throttler', delay=0.1)
    bt.start()
    throttled_requests = bt.multi_submit(request_list)

    print('shutting down the throttler')
    bt.shutdown()
    print('waiting for the requests to be done')
    bt.wait_end()
    print('run_done')
    responses = [tr.response for tr in throttled_requests]

    rows = []
    for cnt, response in enumerate(responses):
        if not response:
            print('The response is invalid: %s' % (response))
            continue

        if response.status_code != 200:
            print('response status code is not 200 OK: {code}'.format(
                code=response.status_code))
            continue

        if not response:
            continue

        js = response.json()
        if not js:
            print('The response is invalid: %s' % (js))
            continue

        if 'dataset' not in js:
            print('The response does not have dataset: %s' % (js))
            continue

        if 'data' not in js['dataset']:
            print('The response data does not have data: %s' % (js))
            continue

        symbol = js['dataset']['dataset_code']
        data = js['dataset']['data']
        for data_for_date in data:
            date_str = data_for_date[0]

            close, open_, high, low, volume = data_for_date[4], data_for_date[
                1], data_for_date[2], data_for_date[3], data_for_date[5]
            rows.append(
                '{date_str},{close},{open},{high},{low},{volume},{symbol}\n'.
                format(date_str=date_str,
                       close=close,
                       open=open_,
                       high=high,
                       low=low,
                       volume=volume,
                       symbol=symbol))
    return rows
Example #4
0
def main():
    args = parse_args()
    bt = BaseThrottler(name='base-throttler', delay=args['delay'])
    reqs = []
    for i in range(0, args['n_reqs']):
        r = requests.Request(method='GET', url=args['url'], data='Request - ' + str(i + 1))
        reqs.append(r)

    with bt:
        throttled_requests = bt.multi_submit(reqs)

    for r in throttled_requests:
        print r.response

    print "Success: {s}, Failures: {f}".format(s=bt.successes, f=bt.failures)
 def __init__(self, config):
     super().__init__()
     self.session = self.createBaseSession()
     self.config = config
     self._delay = config.get('requestDelay')
     self.session.headers.update({'User-Agent': 'Niantic App'})
     self.session.verify = True
     self.throttle = BaseThrottler(name='mainThrottle', session=self.session, delay=self._delay)
     self.throttle.start()
     self.orig = None
Example #6
0
def main():
    args = parse_args()
    session = requests.Session()
    session.headers.update({'user-agent': 'test-user-agent'})
    bt = BaseThrottler(name='base-throttler',
                       delay=args['delay'],
                       session=session)
    reqs = []
    for i in range(0, args['n_reqs']):
        r = requests.Request(method='GET',
                             url=args['url'],
                             data='Request - ' + str(i + 1))
        reqs.append(r)

    with bt:
        throttled_requests = bt.multi_submit(reqs)

    for r in throttled_requests:
        print(r.response)

    print("Success: {s}, Failures: {f}".format(s=bt.successes, f=bt.failures))
Example #7
0
 def __init__(self, local, user_key=None, service_key=None):
     requests_cache.install_cache('fse', expire_after=3600)
     self.bt = BaseThrottler(name='fse-throttler', reqs_over_time=(1, 2))
     self.bt.start()
     self.airports = common.load_airports()
     self.aircraft = common.load_aircrafts()
     self.service_key = service_key
     self.user_key = user_key
     if local:
         self.assignments = common.load_pickled_assignments()
     else:
         self.assignments = self.get_assignments()
Example #8
0
class FSEconomy(object):
    def __init__(self, local, user_key=None, service_key=None):
        requests_cache.install_cache('fse', expire_after=3600)
        self.bt = BaseThrottler(name='fse-throttler', reqs_over_time=(1, 2))
        self.bt.start()
        self.airports = common.load_airports()
        self.aircraft = common.load_aircrafts()
        self.service_key = service_key
        self.user_key = user_key
        if local:
            self.assignments = common.load_pickled_assignments()
        else:
            self.assignments = self.get_assignments()

    def get_aggregated_assignments(self, cargo=False):
        if cargo:
            self.assignments = self.assignments[self.assignments.UnitType == 'kg']
        else:
            self.assignments = self.assignments[self.assignments.UnitType == 'passengers']
        grouped = self.assignments.groupby(['FromIcao', 'ToIcao'], as_index=False)
        aggregated = grouped.aggregate(np.sum)
        return aggregated.sort_values('Pay', ascending=False)

    def send_single_request(self, path):
        query_link = self.generate_request(const.LINK + path)
        request = requests.Request(method='GET', url=query_link)
        i = 0
        while True:
            try:
                thottled_request = self.bt.submit(request)
                data = thottled_request.response.content
                if 'To many requests' in data or 'minimum delay' in data:
                    raise requests.exceptions.ConnectionError
                return data
            except requests.exceptions.ConnectionError:
                requests_cache.clear()
                if i >= 10:
                    raise
                print 'Retrying Request'
                i += 1
                time.sleep(60)

    def send_multi_request(self, paths):
        request_queue = []
        for path in paths:
            query_link = self.generate_request(const.LINK + path)
            request_queue.append(requests.Request(method='GET', url=query_link))

        i = 0
        while True:
            try:
                thottled_requests = self.bt.multi_submit(request_queue)
                responses = [tr.response for tr in thottled_requests]
                request_queue = []
                complete_response = []
                for response in responses:
                    if 'To many requests' in response.content or 'minimum delay' in response.content:
                        request_queue.append(response.url)
                        print response.content
                    elif 'you are now in a lockout period' in response.content:
                        raise Exception(response.content)
                    else:
                        complete_response.append(response.content)

                if len(request_queue) > 0:
                    raise requests.exceptions.ConnectionError

                return complete_response
            except AttributeError:
                for request in request_queue:
                    print 'Error with request: ', request
                raise
            except requests.exceptions.ConnectionError:
                requests_cache.clear()
                if i >= 10:
                    raise
                print 'Retrying Request'
                i += 1
                time.sleep(60)

    def get_aircrafts_by_icaos(self, icaos):
        aircraft_requests = []
        for icao in icaos:
            aircraft_requests.append('query=icao&search=aircraft&icao={}'.format(icao))

        responses = self.send_multi_request(aircraft_requests)
        all_aircraft = []
        for response in responses:
            aircraft = pd.DataFrame.from_csv(StringIO(response))
            try:
                aircraft.RentalDry = aircraft.RentalDry.astype(float)
                aircraft.RentalWet = aircraft.RentalWet.astype(float)
                all_aircraft.append(aircraft)
            except:
                print 'error updating rental info: ', response

        return all_aircraft

    def get_assignments(self):
        assignments = pd.DataFrame()

        i = 0
        assignment_requests = []
        number_at_a_time = 1000
        while i + number_at_a_time < len(self.airports):
            assignment_requests.append(
                'query=icao&search=jobsfrom&icaos={}'.format('-'.join(self.airports.icao[i:i + number_at_a_time])))
            i += number_at_a_time

        responses = self.send_multi_request(assignment_requests)
        for data in responses:
            assignments = pd.concat([assignments, pd.DataFrame.from_csv(StringIO(data))])

        response = self.send_single_request('query=icao&search=jobsfrom&icaos={}'.format('-'.join(self.airports.icao[i:len(self.airports) - 1])))
        assignments = pd.concat([assignments, pd.DataFrame.from_csv(StringIO(response))])
        with open('assignments', 'wb') as f:
            pickle.dump(assignments, f)
        return assignments

    def get_best_assignments(self, row):
        df = self.assignments[(self.assignments.FromIcao == row['FromIcao']) &
                              (self.assignments.ToIcao == row['ToIcao']) & (self.assignments.Amount <= row['Seats'])]
        if not len(df):
            return None
        prob = LpProblem("Knapsack problem", LpMaximize)
        w_list = df.Amount.tolist()
        p_list = df.Pay.tolist()
        x_list = [LpVariable('x{}'.format(i), 0, 1, 'Integer') for i in range(1, 1 + len(w_list))]
        prob += sum([x * p for x, p in zip(x_list, p_list)]), 'obj'
        prob += sum([x * w for x, w in zip(x_list, w_list)]) <= row['Seats'], 'c1'
        prob.solve()
        return df.iloc[[i for i in range(len(x_list)) if x_list[i].varValue]]

    def get_best_craft(self, icao, radius):
        print 'Searching for the best aircraft from {}'.format(icao)
        max_seats = 0
        best_aircraft = None
        near_icaos = self.get_closest_airports(icao, radius).icao

        all_aircraft = self.get_aircrafts_by_icaos(near_icaos)
        for aircraft in all_aircraft:
            if not len(aircraft):
                continue
            merged = pd.DataFrame.merge(aircraft, self.aircraft, left_on='MakeModel', right_on='Model', how='inner')
            merged = merged[
                (~merged.MakeModel.isin(const.IGNORED_AIRCRAFTS)) & (merged.RentalWet + merged.RentalDry > 0)]
            if not len(merged):
                continue
            aircraft = merged.ix[merged.Seats.idxmax()]
            if aircraft.Seats > max_seats:
                best_aircraft = aircraft
                max_seats = aircraft.Seats
        return best_aircraft

    def get_closest_airports(self, icao, nm):
        lat = self.airports[self.airports.icao == icao].lat.iloc[0]
        nm = float(nm)
        # one degree of latitude is appr. 69 nm
        lat_min = lat - nm / 69
        lat_max = lat + nm / 69
        filtered_airports = self.airports[self.airports.lat > lat_min]
        filtered_airports = filtered_airports[filtered_airports.lat < lat_max]
        distance_vector = filtered_airports.icao.map(lambda x: self.get_distance(icao, x))
        return filtered_airports[distance_vector < nm]

    def get_distance(self, from_icao, to_icao):
        try:
            lat1, lon1 = [radians(x) for x in self.airports[self.airports.icao == from_icao][['lat', 'lon']].iloc[0]]
            lat2, lon2 = [radians(x) for x in self.airports[self.airports.icao == to_icao][['lat', 'lon']].iloc[0]]
        except IndexError:
            return 9999.9
        return common.get_distance(lat1, lon1, lat2, lon2)

    def get_logs(self, from_id):
        key = self.user_key or self.service_key
        data = self.send_single_request('query=flightlogs&search=id&readaccesskey={}&fromid={}'.format(key, from_id))
        logs = pd.DataFrame.from_csv(StringIO(data))
        logs = logs[(logs.MakeModel != 'Airbus A321') & (logs.MakeModel != 'Boeing 737-800') & (logs.Type == 'flight')]
        logs['Distance'] = logs.apply(lambda x, self=self: self.get_distance(x['From'], x['To']), axis=1)
        logs = pd.merge(logs, self.aircraft, left_on='MakeModel', right_on='Model')
        logs['FlightTimeH'] = logs.apply(lambda x: int(x['FlightTime'].split(':')[0]), axis=1)
        logs['FlightTimeM'] = logs.apply(lambda x: int(x['FlightTime'].split(':')[1]), axis=1)
        logs = logs[(logs.FlightTimeH > 0) | (logs.FlightTimeM > 0)]
        logs = logs[logs.Distance > 0]
        logs['AvSpeed'] = logs.apply(lambda x: 60 * x['Distance'] / (60 * x['FlightTimeH'] + x['FlightTimeM']), axis=1)
        import pdb
        pdb.set_trace()

    def generate_request(self, query_link):
        if self.user_key:
            query_link += '&userkey={}'.format(self.user_key)
        elif self.service_key:
            query_link += '&servicekey={}'.format(self.service_key)
        return query_link

    def __del__(self):
        self.bt.shutdown()
Example #9
0
def _run_requests_return_rows(request_list):
    bt = BaseThrottler(name='base-throttler', delay=0.04)
    bt.start()
    throttled_requests = bt.multi_submit(request_list)

    print('shutting down the throttler')
    bt.shutdown()
    print('waiting for the requests to be done')
    bt.wait_end()
    print('run_done')
    responses = [tr.response for tr in throttled_requests]

    rows = []
    for cnt, res in enumerate(responses):
        if not res:
            print('The response is invalid: %s' % (res))
            continue

        if res.status_code != 200:
            print('response status code is not 200 OK: {code}'.format(
                code=res.status_code))
            continue

        js = res.json()
        req = request_list[cnt]
        m = re.search(r'stock/([^/]+)', req.url)
        if not m:
            continue

        if not m.groups():
            continue

        symbol = m.groups()[0]

        if not js:
            continue

        print('{cnt}th {symbol}, blobs: {l}'.format(cnt=cnt,
                                                    symbol=symbol,
                                                    l=len(js)))
        prev_close = None
        for blob in js:
            keys = ['date', 'close', 'open', 'high', 'low', 'volume']
            is_blob_compromised = False
            for k in keys:
                if k not in blob:
                    print(
                        'blob: {blob} does not have all the expected keys, missing key: {key}'
                        .format(blob=str(blob), key=k))
                    is_blob_compromised = True
                    break
            if is_blob_compromised:
                continue
            date_str = blob['date']
            close, open_, high, low, volume = blob['close'], blob[
                'open'], blob['high'], blob['low'], blob['volume']
            if volume == '0' or volume == 0 or close is None:
                close, open_, high, low = prev_close, prev_close, prev_close, prev_close

            if close is None:
                continue

            rows.append(
                '{date_str},{close},{open},{high},{low},{volume},{symbol}\n'.
                format(date_str=date_str,
                       close=close,
                       open=open_,
                       high=high,
                       low=low,
                       volume=volume,
                       symbol=symbol))

            prev_close = close
    return rows
Example #10
0
 def __init__(self, logs_cache_dir):
     self.logs_cache_dir = logs_cache_dir + '/'
     self.throttler = BaseThrottler(name='base-throttler', delay=0.2)
     self.throttler.start()
Example #11
0
class LogsClient:
    def __init__(self, logs_cache_dir):
        self.logs_cache_dir = logs_cache_dir + '/'
        self.throttler = BaseThrottler(name='base-throttler', delay=0.2)
        self.throttler.start()

    def fetchLogs(self, log_metadata):
        log_metadata_lookup = {log[u'id']: log for log in log_metadata}

        # fetch cached log data
        if not os.path.isdir(self.logs_cache_dir):
            os.makedirs(self.logs_cache_dir)

        cache_filepaths = {
            id: self.getLogFilepath(id)
            for id in log_metadata_lookup
        }
        existing_logs = {
            id: loadJson(filepath)
            for id, filepath in cache_filepaths.items()
            if os.path.isfile(filepath)
        }

        updated_log_ids = [
            id for id, log in existing_logs.items()
            if log[u'info'][u'date'] < log_metadata_lookup[id][u'date']
        ]

        # fetch any new uncached logs or logs that need to be updated
        fetched_log_ids = [
            id for id in log_metadata_lookup
            if (id not in existing_logs or id in updated_log_ids)
        ]
        fetched_logs = {}
        if len(fetched_log_ids) > 0:
            reqs = [
                requests.Request('GET', 'http://logs.tf/api/v1/log/' + str(id))
                for id in fetched_log_ids
            ]

            throttled_requests = self.throttler.multi_submit(reqs)

            fetched_logs = {
                getLogIdFromUrl(tr.request.url): tr.response.json()
                for tr in throttled_requests
            }

            # update cache
            for id, log_json in fetched_logs.items():
                saveJson(self.getLogFilepath(id), log_json)

        # return merged cached & new results, preferring the new results if any conflicts
        return {**existing_logs, **fetched_logs}

    def getUploaderLogMetadata(self, uploaderId):
        return self.throttler.submit(
            requests.Request(
                'GET', 'http://logs.tf/api/v1/log?uploader=' + uploaderId +
                '&limit=10000')).response.json()

    def getLogFilepath(self, id):
        return self.logs_cache_dir + str(id) + '.json'

    def close(self):
        self.throttler.shutdown()
class ThrottledSession():

    def __init__(self, config):
        super().__init__()
        self.session = self.createBaseSession()
        self.config = config
        self._delay = config.get('requestDelay')
        self.session.headers.update({'User-Agent': 'Niantic App'})
        self.session.verify = True
        self.throttle = BaseThrottler(name='mainThrottle', session=self.session, delay=self._delay)
        self.throttle.start()
        self.orig = None

    def getThrottle(self):
        return self.throttle

    def createBaseSession(self):
        sess = session()
        sess.headers = {
            'User-Agent': 'Niantic App',
        }
        sess.verify = False
        return sess

    def updateDelay(self):
        config_delay = self.config.get('requestDelay')
        if self._delay != config_delay:
            self._delay = config_delay
            if self.throttle:
                self.throttle._delay = self._delay

    def post(self, url, **kwargs):
        self.updateDelay()
        wrapper = Request(method='POST', url=url, **kwargs)
        res = self.throttle.submit(wrapper).response
        return res

    def get(self, url, **kwargs):
        self.updateDelay()
        wrapper = Request(method='GET', url=url, **kwargs)
        res = self.throttle.submit(wrapper).response
        return res

    def makeThrottle(self):
        throttle = BaseThrottler(name='mainThrottle', session=self.session, delay=self._delay)
        return throttle

    def restart(self):
        if self.orig:
            self.throttle.shutdown()
            self.throttle = self.orig
            self.throttle.unpause()
            self.orig = None

    def pauseExec(self):
        self.orig = self.throttle
        self.orig.pause()
        self.throttle = self.makeThrottle()
        self.throttle.start()

    def stop(self):
        self.throttle.shutdown()

    def makeNew(self):
        self.throttle.shutdown()
        time.sleep(1)
        self.throttle = self.makeThrottle()
        self.throttle.start()
Example #13
0
import logging
import os
import re
import requests
from requests_throttler import BaseThrottler

FOOD_PLAN_INDEX = '''http://www.cnpp.usda.gov/USDAFoodPlansCostofFood/reports?field_publication_type_tid=953&field_publication_date_value[value]&page={page_no}'''
ORIGIN_BASE_ADDRESS = '''http://origin.www.cnpp.usda.gov/'''
ORIGIN_TABLE_ADDRESS  = '''http://origin.www.cnpp.usda.gov/USDAFoodCost-Home.htm'''

COF_REPORT_NAME_REGEX = re.compile(r'/(CostofFood\w\w\w(\d){2,4}\.pdf)\b')

SCRIPT_DIRECTORY = os.path.dirname(os.path.realpath(__file__))
PDF_DIRECTORY = os.path.join(SCRIPT_DIRECTORY, 'pdfs')

THROTTLER = BaseThrottler(name='cof-report-throttler', delay=10.0)

class Report:
    '''A container for report name and link that overrides __hash__ and
    __eq__ for sets to remove reports of the same name'''
    def __init__(self, name, link):
        self.name = name
        self.link = link

    def __hash__(self):
        return hash(self.name)

    def __eq__(self, other):
        return self.name == other.name

    def __repr__(self):
Example #14
0
def _run_requests_return_rows(request_list):
    bt = BaseThrottler(name='base-throttler', delay=0.04)
    bt.start()
    throttled_requests = bt.multi_submit(request_list)

    print('shutting down the throttler')
    bt.shutdown()
    print('waiting for the requests to be done')
    bt.wait_end()
    print('run_done')
    responses = [tr.response for tr in throttled_requests]

    rows = []
    for cnt, res in enumerate(responses):
        if not res:
            print('The response is invalid: %s' % (res))
            continue

        if res.status_code != 200:
            print('response status code is not 200 OK: {code}'.format(
                code=res.status_code))
            continue

        if not res:
            continue

        js = res.json()

        if 'status' not in js or (js['status'] != 'OK'
                                  and js['status'] != 'success'):
            print('The response does not have proper status: %s' % (js))
            continue

        keys = ['open', 'afterHours', 'high', 'low', 'volume', 'from']
        is_blob_compromised = False
        for k in keys:
            if k not in js:
                print(
                    'blob: {blob} does not have all the expected keys, missing key: {key}'
                    .format(blob=str(blob), key=k))
                is_blob_compromised = True
                break
        if is_blob_compromised:
            continue

        symbol = js['symbol']

        close, open_, high, low, volume = js['afterHours'], js['open'], js[
            'high'], js['low'], js['volume']
        print('{symbol}'.format(symbol=symbol))
        close_v = float(close)
        if close_v < 1.0 or close_v > 10000:
            continue

        date_str = datetime.datetime.strptime(
            js['from'], "%Y-%m-%dT%H:%M:%SZ").strftime("%Y-%m-%d")

        rows.append(
            '{date_str},{close},{open},{high},{low},{volume},{symbol}\n'.
            format(date_str=date_str,
                   close=close,
                   open=open_,
                   high=high,
                   low=low,
                   volume=volume,
                   symbol=symbol))

    return rows