Exemple #1
0
def sync_series():
    logger.info("Download idx data...")
    os.makedirs(IDX_DATA_DIR, exist_ok=True)
    with open(IDX_LIST_FILE_NAME, 'r') as f:
        listing = f.read()
    listing = json.loads(listing)
    for a in listing:
        if RELAY_KEY is None:
            avantage_data = load_series_daily_adjusted(a['id'])
            if avantage_data is None:
                continue
            avantage_data = avantage_data.sel(field='close')
            d = avantage_data.to_netcdf(compute=True)
            d = gzip.compress(d)
            d = base64.b64encode(d)
            url = IDX_DATA_VERIFY_URL + "/" + str(a['id']) + "/"
            approved_range = request_with_retry(url, d)
            if approved_range is None:
                logger.info("approved range None")
                continue
            approved_range = json.loads(approved_range)
            logger.info("approved range " + str(approved_range))
            data = avantage_data.loc[approved_range[0]:approved_range[1]]
        else:
            url = IDX_DATA_FULL_URL + "/" + str(a['id']) + "/"
            data = request_with_retry(url)
            if data is None:
                continue
            data = xr.open_dataarray(data)
        file_name = os.path.join(IDX_DATA_DIR, a['id'] + '.nc')
        data.to_netcdf(path=file_name, compute=True)
    logger.info("Done.")
Exemple #2
0
def sync_db(id):
    logger.info("sync db: " + id)
    db_folder = os.path.join(BLSGOV_DIR, id.lower())
    db_folder_new = db_folder + "_new"
    lockfile = db_folder + '.lock'

    try:
        shutil.rmtree(db_folder_new)
    except FileNotFoundError:
        pass
    os.makedirs(db_folder_new, exist_ok=True)

    file_list = request_with_retry(BLSGOV_MASTER_URL + "files/" + id.lower())
    file_list = json.loads(file_list)
    file_list = [f['name'] for f in file_list if f['type'] == 'file']

    logger.info("files to download: " + str(len(file_list)))

    for fn in file_list:
        data = request_with_retry(BLSGOV_MASTER_URL + "files/" + id.lower() +
                                  "/" + fn)
        path = os.path.join(db_folder_new, fn)
        with open(path, 'wb') as f:
            f.write(data)

    with portalocker.Lock(lockfile, flags=portalocker.LOCK_EX):
        try:
            shutil.rmtree(db_folder)
        except FileNotFoundError:
            pass
        shutil.move(db_folder_new, db_folder)
Exemple #3
0
def sync_major():
    if RELAY_KEY is None:
        return
    os.makedirs(IDX_DIR, exist_ok=True)
    logger.info("Download major idx list...")
    listing = request_with_retry(MAJOR_IDX_LIST_URL)

    old_listing = ''

    try:
        with open(MAJOR_IDX_LIST_FILE_NAME, 'rb') as f:
            old_listing = f.read()
    except FileNotFoundError:
        pass

    if old_listing == listing:
        logger.info("nothing changed")
        return

    logger.info("Download major idx data...")
    data = request_with_retry(MAJOR_IDX_DATA_URL)

    with open(MAJOR_IDX_DATA_FILE_NAME, 'wb') as f:
        f.write(data)

    with open(MAJOR_IDX_LIST_FILE_NAME, 'wb') as f:
        f.write(listing)

    logger.info("Done.")
Exemple #4
0
def sync_indexes():
    logger.info("Download idx list...")
    os.makedirs(IDX_DIR, exist_ok=True)
    url = IDX_LIST_URL
    listing = request_with_retry(url)
    listing = json.loads(listing)
    listing = [l for l in listing if INDEXES is None or l['id'] in INDEXES]

    old_listing = []

    try:
        with open(IDX_LIST_FILE_NAME, 'rb') as f:
            old_listing = f.read()
            old_listing = json.loads(old_listing)
    except FileNotFoundError:
        pass

    if listing == old_listing:
        logger.info('nothing changed')
        return

    logger.info("Download idx data...")
    os.makedirs(IDX_DATA_DIR, exist_ok=True)
    for a in listing:
        if RELAY_KEY is None:
            if a.get('etf') is None:
                continue
            avantage_data = load_series_daily_adjusted(a['etf'])
            if avantage_data is None:
                continue
            avantage_data = avantage_data.sel(field='close')
            d = avantage_data.to_netcdf(compute=True)
            d = gzip.compress(d)
            d = base64.b64encode(d)
            url = IDX_DATA_VERIFY_URL + "/" + str(a['id']) + "/"
            approved_range = request_with_retry(url, d)
            if approved_range is None:
                logger.info("approved range None")
                continue
            approved_range = json.loads(approved_range)
            logger.info("approved range " + str(approved_range))
            data = avantage_data.loc[approved_range[0]:approved_range[1]]
        else:
            url = IDX_DATA_FULL_URL + "/" + str(a['id']) + "/"
            data = request_with_retry(url)
            if data is None:
                continue
            data = xr.open_dataarray(data)
        file_name = os.path.join(IDX_DATA_DIR, a['id'] + '.nc')
        data.to_netcdf(path=file_name, compute=True)

    with open(IDX_LIST_FILE_NAME, 'w') as f:
        f.write(json.dumps(listing, indent=2))
    logger.info('Done.')
Exemple #5
0
def sync():
    logger.info("Download crypto...")
    os.makedirs(WORK_DIR, exist_ok=True)
    data = request_with_retry(CRYPTOFUTURES_V2_URL, min_size=1024)
    with open(CRYPTOFUTURES_V2_FILE_NAME, 'wb') as f:
        f.write(data)

    data = request_with_retry(CRYPTOFUTURES_V1_URL, min_size=1024)
    with open(CRYPTOFUTURES_V1_FILE_NAME, 'wb') as f:
        f.write(data)

    logger.info('Done.')
Exemple #6
0
def sync_data():
    logger.info("Download assets data...")
    os.makedirs(ASSETS_DATA_DIR, exist_ok=True)
    with open(ASSETS_LIST_FILE_NAME, 'r') as f:
        assets = f.read()
    assets = json.loads(assets)
    shuffle(assets)
    progress = 0
    for a in assets:
        if RELAY_KEY is None:
            main_data = load_series_daily_adjusted(a['avantage_symbol']) # FORWARD ORDER
            if main_data is None:
                continue
            d = main_data.to_netcdf(compute=True)
            d = gzip.compress(d)
            d = base64.b64encode(d)
            url = ASSETS_DATA_VERIFY_URL + "/" + str(a['internal_id']) + "/"
            approved_range = request_with_retry(url, d)
            if approved_range is None:
                logger.info("approved range None")
                continue
            approved_range = json.loads(approved_range)
            logger.info("approved range " + str(approved_range))

            main_data = main_data.loc[:, approved_range[0]:approved_range[1]]

            split_cumprod = main_data.sel(field="split").cumprod()
            is_liquid = split_cumprod.copy(True)
            is_liquid[:] = 0

            for lr in a['liquid_ranges']:
                try:
                    is_liquid.loc[lr[0]:lr[1]] = 1
                except:
                    pass

            ext_data = xr.concat([split_cumprod, is_liquid], pd.Index(["split_cumprod", "is_liquid"], name="field"))
            data = xr.concat([main_data, ext_data], "field")
        else:
            url = ASSETS_DATA_FULL_URL + "/" + str(a['internal_id']) + "/"
            main_data = request_with_retry(url)
            if main_data is None:
                continue
            data = xr.open_dataarray(main_data)

        file_name = os.path.join(ASSETS_DATA_DIR, a['id'] + '.nc')
        data.to_netcdf(path=file_name, compute=True)
        progress += 1
        logger.info("progress: " + str(progress) + "/" + str(len(assets)))
    logger.info("Done.")
Exemple #7
0
def sync():
    logger.info("Download crypto...")
    os.makedirs(WORK_DIR, exist_ok=True)
    data = request_with_retry(CRYPTO_URL)
    with open(CRYPTO_FILE_NAME, 'wb') as f:
        f.write(data)
    logger.info('Done.')
Exemple #8
0
    def handle(self, **options):
        dt = ''
        try:
            with open(CRYPTO_LAST_DATE_FILE_NAME, 'r') as f:
                dt = f.read()
        except:
            pass
        request_with_retry(POST_STATUS_URL + "/crypto/" + dt + "/")

        dt = ''
        try:
            with open(STOCKS_LAST_DATE_FILE_NAME, 'r') as f:
                dt = f.read()
        except:
            pass
        request_with_retry(POST_STATUS_URL + "/stocks/" + dt + "/")
Exemple #9
0
def sync_list():
    logger.info("Download futures list...")
    os.makedirs(FUTURES_DIR, exist_ok=True)

    url = FUTURES_V2_LIST_URL
    lst = request_with_retry(url, min_size=1024)
    lst = json.loads(lst)
    with open(FUTURES_V2_LIST_FILE_NAME, 'w') as f:
        f.write(json.dumps(lst, indent=2))

    url = FUTURES_V1_LIST_URL
    lst = request_with_retry(url, min_size=1024)
    lst = json.loads(lst)
    with open(FUTURES_V1_LIST_FILE_NAME, 'w') as f:
        f.write(json.dumps(lst, indent=2))

    logger.info('Done.')
Exemple #10
0
    def handle(self, **options):
        dt = None
        try:
            with open(CRYPTO_LAST_DATE_FILE_NAME, 'r') as f:
                dt = f.read()
        except:
            pass

        server_dt = request_with_retry(CRYPTO_LAST_DATE_URL).decode()

        if dt is None or server_dt > dt:

            crypto.sync.sync()

            request_with_retry(POST_STATUS_URL + "/crypto/" + server_dt + "/")

            with open(CRYPTO_LAST_DATE_FILE_NAME, 'w') as f:
                f.write(server_dt)
    def handle(self, **options):
        dt = None
        try:
            with open(FUTURES_LAST_DATE_FILE_NAME, 'r') as f:
                dt = f.read()
        except:
            pass

        server_dt = request_with_retry(FUTURES_LAST_DATE_URL).decode()

        if dt is None or server_dt > dt or not os.path.exists(FUTURES_V2_DATA2_FILE_NAME):
            futures.sync.sync_list()
            futures.sync.sync_data()

            request_with_retry(POST_STATUS_URL + "/futures/" + server_dt + "/")

            with open(FUTURES_LAST_DATE_FILE_NAME, 'w') as f:
                f.write(server_dt)
Exemple #12
0
def sync_imf_file(url, file):
    old_data = None
    if os.path.exists(file):
        old_data = open(file, 'rb').read()
    new_data = request_with_retry(url)
    if new_data == old_data:
        return
    print("update", file)
    open(file, 'wb').write(new_data)
Exemple #13
0
def sync_list():
    logger.info("Download idx list...")
    os.makedirs(IDX_DIR, exist_ok=True)
    url = IDX_LIST_URL
    listing = request_with_retry(url)
    listing = json.loads(listing)
    listing = [l for l in listing if INDEXES is None or l['id'] in INDEXES]
    with open(IDX_LIST_FILE_NAME, 'w') as f:
        f.write(json.dumps(listing, indent=2))
    logger.info('Done.')
Exemple #14
0
def sync_data():
    logger.info("Download futures data...")
    os.makedirs(FUTURES_DIR, exist_ok=True)
    url = FUTURES_V2_DATA_URL
    data = request_with_retry(url, min_size=1024)
    with open(FUTURES_V2_DATA_FILE_NAME, 'wb') as f:
        f.write(data)

    data = request_with_retry(url + "?offset=1", min_size=1024)
    with open(FUTURES_V2_DATA1_FILE_NAME, 'wb') as f:
        f.write(data)

    data = request_with_retry(url + "?offset=2", min_size=1024)
    with open(FUTURES_V2_DATA2_FILE_NAME, 'wb') as f:
        f.write(data)

    url = FUTURES_V1_DATA_URL
    data = request_with_retry(url, min_size=1024)
    with open(FUTURES_V1_DATA_FILE_NAME, 'wb') as f:
        f.write(data)

    data = request_with_retry(url + "?offset=1", min_size=1024)
    with open(FUTURES_V1_DATA1_FILE_NAME, 'wb') as f:
        f.write(data)

    data = request_with_retry(url + "?offset=2", min_size=1024)
    with open(FUTURES_V1_DATA2_FILE_NAME, 'wb') as f:
        f.write(data)

    logger.info('Done.')
Exemple #15
0
    def handle(self, **options):
        dt = None
        try:
            with open(STOCKS_LAST_DATE_FILE_NAME, 'r') as f:
                dt = f.read()
        except:
            pass

        server_dt = request_with_retry(STOCKS_LAST_DATE_URL).decode()

        if dt is None or server_dt > dt:

            assets.sync.sync_list()
            assets.sync.sync_data()
            secgov.sync.sync()
            idx.sync.sync_list()
            idx.sync.sync_series()

            request_with_retry(POST_STATUS_URL + "/stocks/" + server_dt + "/")

            with open(STOCKS_LAST_DATE_FILE_NAME, 'w') as f:
                f.write(server_dt)
Exemple #16
0
def sync():
    logger.info("Sync sec.gov forms...")

    os.makedirs(SECGOV_FORMS_DIR_NAME, exist_ok=True)

    with open(ASSETS_LIST_FILE_NAME, 'r') as f:
        tickers = f.read()
    tickers = json.loads(tickers)
    tickers = [
        t for t in tickers
        if os.path.exists(os.path.join(ASSETS_DATA_DIR, t['id'] + '.nc'))
    ]
    ciks = [t['cik'] for t in tickers]
    ciks = json.dumps(ciks).encode()

    last_id = 0
    try:
        with open(SEC_GOV_LAST_ID_FILE_NAME, 'r') as f:
            last_id = f.readline()
            last_id = last_id.strip()
            last_id = int(last_id)
    except:
        logger.exception("can't read " + SEC_GOV_LAST_ID_FILE_NAME)

    if not SECGOV_INCREMENTAL_UPDATE:
        last_id = 0

    while True:
        url = BASE_URL + "?min_id=" + str(last_id + 1)
        raw = request_with_retry(url, ciks)
        if raw is None:
            break
        lst = json.loads(raw)
        if len(lst) == 0:
            break
        for r in lst:
            d = r['date'].split("-")
            path = os.path.join(SECGOV_FORMS_DIR_NAME,
                                r['type'].replace('/', '-'), *d, r['cik'])
            os.makedirs(path, exist_ok=True)
            fn = os.path.join(path, str(r['id']) + '.json')
            with open(fn, 'w') as f:
                f.write(json.dumps(r, indent=2))
            last_id = max(last_id, r['id'])
        with open(SEC_GOV_LAST_ID_FILE_NAME, 'w') as f:
            f.write(str(last_id))
    logger.info("Done.")
Exemple #17
0
def sync_list():
    logger.info("Download asset list...")
    os.makedirs(ASSETS_DIR, exist_ok=True)
    listing = []
    symbols_json = json.dumps(SYMBOLS).encode()
    min_id = 0
    while True:
        url = ASSETS_LIST_URL + "?min_id=" + str(min_id + 1)
        page = request_with_retry(url, symbols_json)
        page = json.loads(page)
        if len(page) == 0:
            break
        for a in page:
            min_id = max(min_id, a['internal_id'])
            listing.append(a)
    with open(ASSETS_LIST_FILE_NAME, 'w') as f:
        f.write(json.dumps(listing, indent=2))
    logger.info('Done.')
Exemple #18
0
def sync_data():
    logger.info("Download assets data...")
    os.makedirs(ASSETS_DATA_DIR, exist_ok=True)
    with open(ASSETS_LIST_FILE_NAME, 'r') as f:
        assets = f.read()
    assets = json.loads(assets)
    shuffle(assets)
    progress = 0
    for a in assets:
        url = ASSETS_DATA_FULL_URL + "/" + str(a['internal_id']) + "/"
        main_data = request_with_retry(url)
        if main_data is None:
            continue
        data = xr.open_dataarray(main_data)

        file_name = os.path.join(ASSETS_DATA_DIR, a['id'] + '.nc')
        data.to_netcdf(path=file_name, compute=True)
        progress += 1
        logger.info("progress: " + str(progress) + "/" + str(len(assets)))
    logger.info("Done.")
Exemple #19
0
def sync_dbs():
    go = True
    while go:
        go = False

        logger.info("Download blsgov db list...")
        os.makedirs(BLSGOV_DIR, exist_ok=True)

        old_listing = []

        try:
            with gzip.open(BLSGOV_DB_LIST_FILE_NAME, 'rt') as f:
                raw = f.read()
                old_listing = json.loads(raw)
        except:
            logger.exception("can't read " + BLSGOV_DB_LIST_FILE_NAME)

        raw = request_with_retry(BLSGOV_MASTER_URL + 'db/')
        new_listing = json.loads(raw)
        if BLSGOV_DBS is not None:
            new_listing = [l for l in new_listing if l['id'] in BLSGOV_DBS]

        if len(new_listing) == len(
            [l for l in new_listing if l in old_listing]):
            logger.info("nothing is new")
            return

        for l in new_listing:
            if l in old_listing:
                continue
            sync_db(l['id'])
            go = True

        lockfile = BLSGOV_DB_LIST_FILE_NAME + '.lock'
        with portalocker.Lock(lockfile, flags=portalocker.LOCK_EX):
            with gzip.open(BLSGOV_DB_LIST_FILE_NAME, 'wt') as f:
                f.write(json.dumps(new_listing, indent=1))
Exemple #20
0
def sync():
    logger.info("Sync sec.gov forms...")

    with open(ASSETS_LIST_FILE_NAME, 'r') as f:
        tickers = f.read()
    tickers = json.loads(tickers)
    # tickers = [t for t in tickers if os.path.exists(os.path.join(ASSETS_DATA_DIR, t['id'] + '.nc'))]
    ciks = [t['cik'] for t in tickers]
    ciks = [c for c in ciks if c is not None]
    # print(ciks)
    os.makedirs(SECGOV_BASE_DIR, exist_ok=True)

    idx = request_with_retry(BASE_URL + SEC_GOV_IDX_FILE_NAME)
    idx = gzip.decompress(idx)
    idx = json.loads(idx)
    idx = [i for i in idx if i['cik'] in ciks]
    # print(idx)

    old_idx = []

    try:
        with gzip.open(os.path.join(SECGOV_BASE_DIR, SEC_GOV_IDX_FILE_NAME),
                       'rt') as f:
            txt = f.read()
            old_idx = json.loads(txt)
    except:
        logger.exception("cant load " + SEC_GOV_IDX_FILE_NAME)

    if idx == old_idx:
        logger.info("nothing changed")
        return

    for i in idx:
        logger.info("sync: " + json.dumps(i))
        oi = next((j for j in old_idx if j['cik'] == i['cik']), None)
        if oi == i:
            print("not changed")
            continue

        os.makedirs(os.path.join(SECGOV_BASE_DIR, i['cik']), exist_ok=True)

        raw = request_with_retry(BASE_URL + i['cik'] + "/" +
                                 SEC_GOV_IDX_FILE_NAME)
        with open(
                os.path.join(SECGOV_BASE_DIR, i['cik'], SEC_GOV_IDX_FILE_NAME),
                'wb') as f:
            f.write(raw)

        raw = request_with_retry(BASE_URL + i['cik'] + "/" +
                                 SEC_GOV_CONTENT_FILE_NAME)
        with open(
                os.path.join(SECGOV_BASE_DIR, i['cik'],
                             SEC_GOV_CONTENT_FILE_NAME), 'wb') as f:
            f.write(raw)

        facts = dict()
        with zipfile.ZipFile(
                os.path.join(SECGOV_BASE_DIR, i['cik'],
                             SEC_GOV_CONTENT_FILE_NAME), 'r') as z:
            for n in z.namelist():
                r = z.read(n).decode()
                r = json.loads(r)
                for f in r['facts']:
                    _f = {
                        'cik':
                        r['cik'],
                        'report_id':
                        r['id'],
                        'report_date':
                        r['date'],
                        'report_type':
                        r['type'],
                        'report_url':
                        r['url'],
                        'fact_name':
                        f['name'],
                        'segment':
                        f.get('segment'),  # TODO
                        'value':
                        f['value'],
                        'period':
                        f['period']['value']
                        if f.get('period') is not None else None,  # TODO
                        'period_type':
                        f['period']['type']
                        if f.get('period') is not None else None,
                        'period_length':
                        get_period_length(f),
                        'unit':
                        f['unit']['value']
                        if f.get('unit') is not None else None,  # TODO
                        'unit_type':
                        f['unit']['type']
                        if f.get('unit') is not None else None,
                    }
                    if f['name'] not in facts.keys():
                        facts[f['name']] = []
                    facts[f['name']].append(_f)

        with zipfile.ZipFile(os.path.join(SECGOV_BASE_DIR, i['cik'],
                                          SEC_GOV_FACTS_FILE_NAME),
                             'w',
                             compression=zipfile.ZIP_DEFLATED,
                             compresslevel=9) as z:
            for k in facts.keys():
                p = facts[k]
                p.sort(key=lambda i: (i['report_date'], i['report_id']))
                z.writestr(k + '.json', json.dumps(p, indent=1))

    with gzip.open(os.path.join(SECGOV_BASE_DIR, SEC_GOV_IDX_FILE_NAME),
                   'wt') as f:
        f.write(json.dumps(idx, indent=1))

    logger.info("Done.")