def download_histories_csv(date_str):
    filename = 'data/intraday/us.intraday.polygon.history.csv'

    request_list = _get_requests(date_str)
    # request_list = request_list[:10]
    bt = BaseThrottler(name='base-throttler', delay=0.04)
    bt.start()
    throttled_requests = bt.multi_submit(request_list)

    print('shutting down the throttler')
    bt.shutdown()
    print('waiting for the requests to be done')
    bt.wait_end()
    print('run_done')
    responses = [tr.response for tr in throttled_requests]

    with open(filename, 'w') as outfile:
        outfile.write('date,time,close,open,high,low,volume,symbol\n')
        for cnt, res in enumerate(responses):

            if not res:
                print('The response is invalid: %s' % (res))
                continue

            if res.status_code != 200:
                continue

            js = res.json()
            if 'results' not in js:
                print('The response does not have results: %s' % (js))
                continue

            data = js['results']
            if not data:
                continue

            symbol = js['ticker']
            print('{cnt}th {symbol}, blobs: {l}'.format(cnt=cnt,
                                                        symbol=symbol,
                                                        l=len(data)))
            out_lines = []
            for blob in data:
                epoch = int(blob['t']) // 1000
                t = datetime.datetime.fromtimestamp(epoch).astimezone(
                    _TZ_US_EAST)
                date_str = t.strftime('%Y-%m-%d')
                time_str = t.strftime('%H:%M:%S')
                close, open_, high, low, volume = blob['c'], blob['o'], blob[
                    'h'], blob['l'], blob['v']
                out_lines.append(
                    '{date_str},{time_str},{close},{open},{high},{low},{volume},{symbol}\n'
                    .format(date_str=date_str,
                            time_str=time_str,
                            close=close,
                            open=open_,
                            high=high,
                            low=low,
                            volume=volume,
                            symbol=symbol))
            outfile.writelines(out_lines)
Exemple #2
0
def _run_requests_return_rows(request_list):
    bt = BaseThrottler(name='base-throttler', delay=0.5)
    bt.start()
    throttled_requests = bt.multi_submit(request_list)

    print('shutting down the throttler')
    bt.shutdown()
    print('waiting for the requests to be done')
    bt.wait_end()
    print('run_done')
    responses = [tr.response for tr in throttled_requests]

    rows = []
    for cnt, res in enumerate(responses):
        if not res:
            print('The response is invalid: %s' % (res))
            continue

        if res.status_code != 200:
            continue

        if not res:
            print('The response does not have contents: %s' % (res))
            continue

        js = res.json()
        if 'status' not in js or (js['status'] != 'OK'
                                  and js['status'] != 'success'):
            print('The response does not have proper status: %s' % (js))
            continue

        if 'tickers' not in js:
            print('The response does not have results: %s' % (js))
            continue

        for i, ticker in enumerate(js['tickers']):
            symbol = ticker['ticker']
            print('{cnt}th {symbol}'.format(cnt=cnt, symbol=symbol))

            daily = ticker['day']
            close, open_, high, low, volume = daily['c'], daily['o'], daily[
                'h'], daily['l'], daily['v']
            epoch = int(ticker['1547787608999']) // 1000
            t = _TZ_US_EAST.localize(datetime.datetime.fromtimestamp(epoch))
            date_str = t.strftime('%Y-%m-%d')
            rows.append(
                '{date_str},{close},{open},{high},{low},{volume},{symbol}\n'.
                format(date_str=date_str,
                       close=close,
                       open=open_,
                       high=high,
                       low=low,
                       volume=volume,
                       symbol=symbol))

    return rows
Exemple #3
0
def _run_requests_return_rows(request_list):
    bt = BaseThrottler(name='base-throttler', delay=0.1)
    bt.start()
    throttled_requests = bt.multi_submit(request_list)

    print('shutting down the throttler')
    bt.shutdown()
    print('waiting for the requests to be done')
    bt.wait_end()
    print('run_done')
    responses = [tr.response for tr in throttled_requests]

    rows = []
    for cnt, response in enumerate(responses):
        if not response:
            print('The response is invalid: %s' % (response))
            continue

        if response.status_code != 200:
            print('response status code is not 200 OK: {code}'.format(
                code=response.status_code))
            continue

        if not response:
            continue

        js = response.json()
        if not js:
            print('The response is invalid: %s' % (js))
            continue

        if 'dataset' not in js:
            print('The response does not have dataset: %s' % (js))
            continue

        if 'data' not in js['dataset']:
            print('The response data does not have data: %s' % (js))
            continue

        symbol = js['dataset']['dataset_code']
        data = js['dataset']['data']
        for data_for_date in data:
            date_str = data_for_date[0]

            close, open_, high, low, volume = data_for_date[4], data_for_date[
                1], data_for_date[2], data_for_date[3], data_for_date[5]
            rows.append(
                '{date_str},{close},{open},{high},{low},{volume},{symbol}\n'.
                format(date_str=date_str,
                       close=close,
                       open=open_,
                       high=high,
                       low=low,
                       volume=volume,
                       symbol=symbol))
    return rows
Exemple #4
0
def _run_requests_return_rows(request_list):
    bt = BaseThrottler(name='base-throttler', delay=0.04)
    bt.start()
    throttled_requests = bt.multi_submit(request_list)

    print('shutting down the throttler')
    bt.shutdown()
    print('waiting for the requests to be done')
    bt.wait_end()
    print('run_done')
    responses = [tr.response for tr in throttled_requests]

    rows = []
    for cnt, res in enumerate(responses):
        if not res:
            print('The response is invalid: %s' % (res))
            continue

        if res.status_code != 200:
            print('response status code is not 200 OK: {code}'.format(
                code=res.status_code))
            continue

        js = res.json()
        req = request_list[cnt]
        m = re.search(r'stock/([^/]+)', req.url)
        if not m:
            continue

        if not m.groups():
            continue

        symbol = m.groups()[0]

        if not js:
            continue

        print('{cnt}th {symbol}, blobs: {l}'.format(cnt=cnt,
                                                    symbol=symbol,
                                                    l=len(js)))
        prev_close = None
        for blob in js:
            keys = ['date', 'close', 'open', 'high', 'low', 'volume']
            is_blob_compromised = False
            for k in keys:
                if k not in blob:
                    print(
                        'blob: {blob} does not have all the expected keys, missing key: {key}'
                        .format(blob=str(blob), key=k))
                    is_blob_compromised = True
                    break
            if is_blob_compromised:
                continue
            date_str = blob['date']
            close, open_, high, low, volume = blob['close'], blob[
                'open'], blob['high'], blob['low'], blob['volume']
            if volume == '0' or volume == 0 or close is None:
                close, open_, high, low = prev_close, prev_close, prev_close, prev_close

            if close is None:
                continue

            rows.append(
                '{date_str},{close},{open},{high},{low},{volume},{symbol}\n'.
                format(date_str=date_str,
                       close=close,
                       open=open_,
                       high=high,
                       low=low,
                       volume=volume,
                       symbol=symbol))

            prev_close = close
    return rows
def _run_requests_return_rows(request_list):
    bt = BaseThrottler(name='base-throttler', delay=0.04)
    bt.start()
    throttled_requests = bt.multi_submit(request_list)

    print('shutting down the throttler')
    bt.shutdown()
    print('waiting for the requests to be done')
    bt.wait_end()
    print('run_done')
    responses = [tr.response for tr in throttled_requests]

    rows = []
    for cnt, res in enumerate(responses):
        if not res:
            print('The response is invalid: %s' % (res))
            continue

        if res.status_code != 200:
            print('response status code is not 200 OK: {code}'.format(
                code=res.status_code))
            continue

        if not res:
            continue

        js = res.json()

        if 'status' not in js or (js['status'] != 'OK'
                                  and js['status'] != 'success'):
            print('The response does not have proper status: %s' % (js))
            continue

        keys = ['open', 'afterHours', 'high', 'low', 'volume', 'from']
        is_blob_compromised = False
        for k in keys:
            if k not in js:
                print(
                    'blob: {blob} does not have all the expected keys, missing key: {key}'
                    .format(blob=str(blob), key=k))
                is_blob_compromised = True
                break
        if is_blob_compromised:
            continue

        symbol = js['symbol']

        close, open_, high, low, volume = js['afterHours'], js['open'], js[
            'high'], js['low'], js['volume']
        print('{symbol}'.format(symbol=symbol))
        close_v = float(close)
        if close_v < 1.0 or close_v > 10000:
            continue

        date_str = datetime.datetime.strptime(
            js['from'], "%Y-%m-%dT%H:%M:%SZ").strftime("%Y-%m-%d")

        rows.append(
            '{date_str},{close},{open},{high},{low},{volume},{symbol}\n'.
            format(date_str=date_str,
                   close=close,
                   open=open_,
                   high=high,
                   low=low,
                   volume=volume,
                   symbol=symbol))

    return rows