Ejemplo n.º 1
0
def load_data_calc_output_and_check_forward_looking(strategy):
    """
    :param strategy: function with data loading and output calculation
    :return: whole output
    """
    qdc.MAX_DATE_LIMIT = None
    qdc.MAX_DATETIME_LIMIT = None

    log_info("Computing of the whole output...")
    whole_output = strategy()

    last_date = datetime.datetime.now().date()
    last_date = last_date - datetime.timedelta(
        days=FORWARD_LOOKING_TEST_OFFSET)
    qdc.MAX_DATE_LIMIT = last_date
    qdc.MAX_DATETIME_LIMIT = datetime.datetime.combine(last_date,
                                                       datetime.time.min)

    log_info("Computing of the cropped output...")
    cropped_output = strategy()

    qdc.MAX_DATE_LIMIT = None
    qdc.MAX_DATETIME_LIMIT = None

    check_forward_looking(cropped_output, whole_output)

    return whole_output
Ejemplo n.º 2
0
def check_forward_looking(cropped_output, whole_output):
    cropped_output = sort_and_crop_output(cropped_output)
    whole_output = sort_and_crop_output(whole_output)

    max_time = min(cropped_output.coords[ds.TIME].values.max(),
                   whole_output.coords[ds.TIME].values.max())

    cropped_output = cropped_output.loc[:max_time]
    whole_output = whole_output.loc[:max_time]

    cropped_output, whole_output = xr.align(cropped_output,
                                            whole_output,
                                            join='outer')

    cropped_output = cropped_output.fillna(0)
    whole_output = whole_output.fillna(0)

    diff = whole_output - cropped_output
    # print(diff.where(diff!=0).dropna('time', 'all').dropna('asset','all'))
    delta = abs(diff).max().values
    if delta > FORWARD_LOOKING_TEST_DELTA:
        log_info('WARNING: This strategy uses forward looking! Delta = ' +
                 str(delta))
        return True
    else:
        log_info('Ok. There is no forward looking.')
        return False
Ejemplo n.º 3
0
def write(state):
    if state is None:
        return
    path = get_env("OUT_STATE_PATH", "state.out.pickle.gz")
    with gzip.open(path, 'wb') as gz:
        pickle.dump(state, gz)
        log_info("State saved.")
Ejemplo n.º 4
0
def load_data(
    assets: tp.List[tp.Union[dict, str]] = None,
    min_date: tp.Union[str, datetime.date, None] = None,
    max_date: tp.Union[str, datetime.date, None] = None,
    dims: tp.Tuple[str, str, str] = (ds.FIELD, ds.TIME, ds.ASSET),
    forward_order: bool = True,
    tail: tp.Union[datetime.timedelta, float,
                   int] = DEFAULT_TAIL) -> xr.DataArray:
    """
    :param assets: list of ticker names to load
    :param min_date: first date in data
    :param max_date: last date of data
    :param dims: tuple with ds.FIELD, ds.TIME, ds.ASSET in the specified order
    :param forward_order: boolean, set true if you need the forward order of dates, otherwise the order is backward
    :param tail: datetime.timedelta, tail size of data. min_date = max_date - tail
    :return: xarray DataArray with historical data for selected assets
    """
    t = time.time()
    data = load_origin_data(assets=assets,
                            min_date=min_date,
                            max_date=max_date,
                            tail=tail)
    log_info("Data loaded " + str(round(time.time() - t)) + "s")
    data = adjust_by_splits(data, False)
    data = data.transpose(*dims)
    if forward_order:
        data = data.sel(**{ds.TIME: slice(None, None, -1)})
    data.name = "stocks"
    return data
Ejemplo n.º 5
0
def calc_output_and_check_forward_looking(data, strategy):
    """
    :param data: loaded data xarray
    :param strategy: function, that calculates outputs using provided data
    :return: output
    """
    cropped_data = data

    last_date = data.coords[ds.TIME].values.max()
    last_date = str(last_date)[0:10]
    last_date = datetime.datetime.strptime(last_date, '%Y-%m-%d').date()
    last_date = last_date - datetime.timedelta(
        days=FORWARD_LOOKING_TEST_OFFSET)
    last_date = str(last_date)

    if data.coords[ds.TIME][0] < data.coords[ds.TIME][-1]:
        cropped_data = cropped_data.loc[{ds.TIME: slice(None, last_date)}]
    else:
        cropped_data = cropped_data.loc[{ds.TIME: slice(last_date, None)}]

    cropped_data = cropped_data.dropna(ds.ASSET, 'all')
    cropped_data = cropped_data.dropna(ds.TIME, 'all')

    log_info("Computing of the cropped output...")
    cropped_output = strategy(cropped_data)
    log_info("Computing of the whole output...")
    whole_output = strategy(data)

    check_forward_looking(cropped_output, whole_output)

    return whole_output
Ejemplo n.º 6
0
 def show_table(row_offset, column_offset):
     try:
         from IPython.display import display
         display(output.iloc[row_offset:row_offset + tail_r,
                             column_offset:column_offset + tail_c])
     except:
         log_info(output.iloc[row_offset:row_offset + tail_r,
                              column_offset:column_offset + tail_c])
Ejemplo n.º 7
0
def calc_correlation(relative_returns, suppress_exception=True):
    try:
        if "SUBMISSION_ID" in os.environ and os.environ["SUBMISSION_ID"] != "":
            log_info("correlation check disabled")
            return []

        ENGINE_CORRELATION_URL = get_env(
            "ENGINE_CORRELATION_URL",
            "https://quantiacs.io/referee/submission/forCorrelation")
        STATAN_CORRELATION_URL = get_env(
            "STATAN_CORRELATION_URL",
            "https://quantiacs.io/statan/correlation")
        PARTICIPANT_ID = get_env("PARTICIPANT_ID", "0")

        with request.urlopen(ENGINE_CORRELATION_URL + "?participantId=" +
                             PARTICIPANT_ID) as response:
            submissions = response.read()
            submissions = json.loads(submissions)
            submission_ids = [s['id'] for s in submissions]

        rr = relative_returns.to_netcdf(compute=True)
        rr = gzip.compress(rr)
        rr = base64.b64encode(rr)
        rr = rr.decode()

        cofactors = []

        chunks = [
            submission_ids[x:x + 50] for x in range(0, len(submission_ids), 50)
        ]

        for c in chunks:
            r = {"relative_returns": rr, "submission_ids": c}
            r = json.dumps(r)
            r = r.encode()
            with request.urlopen(STATAN_CORRELATION_URL, r) as response:
                cs = response.read()
                cs = json.loads(cs)
                cofactors = cofactors + cs

        result = []
        for c in cofactors:
            sub = next(
                (s for s in submissions if str(c['id']) == str(s['id'])))
            sub['cofactor'] = c['cofactor']
            sub['sharpe_ratio'] = c['sharpe_ratio']
            result.append(sub)
        return result
    except Exception as e:
        log_err("WARNING! Can't calculate correlation.")
        if suppress_exception:
            import logging
            logging.exception("network error")
            return []
        else:
            raise e
Ejemplo n.º 8
0
def load_output(fn, date):
    output = xr.open_dataarray(fn, cache=False)
    output = output.compute()
    if 'time' not in output.coords:
        log_info('append dimension')
        output = xr.concat([output], pd.DatetimeIndex([date], name='time'))
    output.coords['asset'] = [
        idt.translate_server_id_to_user_id(id) for id in output.asset.values
    ]
    return output
Ejemplo n.º 9
0
def read(path=None):
    if path is None:
        path = get_env("IN_STATE_PATH", "state.in.pickle.gz")
    try:
        with gzip.open(path, 'rb') as gz:
            res = pickle.load(gz)
            log_info("State loaded.")
            return res
    except Exception as e:
        log_err("Can't load state.", e)
        return None
Ejemplo n.º 10
0
def check_output(output, data_type='stocks'):
    if data_type != 'stocks' and data_type != 'stocks_long' and data_type != 'futures' \
            and data_type != 'crypto' and data_type != 'crypto_futures' and data_type != 'cryptofutures':
        log_err("Unsupported data_type", data_type)
        return

    in_sample_points = qnt.stats.get_default_is_period_for_type(data_type)

    min_date = qnt.stats.get_default_is_start_date_for_type(data_type)
    output_tail = output.where(output.time > np.datetime64(min_date)).dropna(
        'time', 'all')
    if len(output_tail) < in_sample_points:
        log_err("ERROR! In sample period does not contain enough points. " +
                str(len(output_tail)) + " < " + str(in_sample_points))
    else:
        log_info("Ok. In sample period contains enough points." +
                 str(len(output_tail)) + " >= " + str(in_sample_points))

    log_info()

    log_info("Load data...")

    data = qnt.data.load_data_by_type(
        data_type,
        assets=output.asset.values.tolist(),
        min_date=(pd.Timestamp(min_date) -
                  pd.Timedelta(days=60)).to_pydatetime())

    log_info()

    qnt.output.check(output, data)
Ejemplo n.º 11
0
def run_iterations(time_series, data, window, start_date, lookback_period,
                   strategy, step, collect_all_states):
    def copy_window(data, dt, tail):
        return copy.deepcopy(window(data, dt, tail))

    log_info("Run iterations...\n")

    ts = np.sort(time_series)
    outputs = []
    all_states = []

    output_time_coord = ts[ts >= start_date]
    output_time_coord = output_time_coord[::step]

    i = 0

    sys.stdout.flush()

    with progressbar.ProgressBar(max_value=len(output_time_coord),
                                 poll_interval=1) as p:
        state = None
        for t in output_time_coord:
            tail = copy_window(data, t, lookback_period)
            result = strategy(tail, copy.deepcopy(state))
            output, state = unpack_result(result)
            if type(output) != xr.DataArray:
                log_err("Output is not xarray!")
                return
            if set(output.dims) != {'asset'} and set(
                    output.dims) != {'asset', 'time'}:
                log_err("Wrong output dimensions. ", output.dims,
                        "Should contain only:", {'asset', 'time'})
                return
            if 'time' in output.dims:
                output = output.sel(time=t)
            output = output.drop(['field', 'time'], errors='ignore')
            outputs.append(output)
            if collect_all_states:
                all_states.append(state)
            i += 1
            p.update(i)

    sys.stderr.flush()

    log_info("Merge outputs...")
    output = xr.concat(outputs, pd.Index(output_time_coord,
                                         name=qndata.ds.TIME))

    return output, all_states if collect_all_states else state
Ejemplo n.º 12
0
def calc_sharpe_ratio_for_check(data, output, kind=None, check_dates=True):
    """
    Calculates sharpe ratio for check according to the rules
    :param data:
    :param output:
    :param kind: competition type
    :param check_dates: do you need to check the sharpe ratio dates?
    :return:
    """
    import qnt.stats as qns

    if kind is None:
        kind = data.name

    start_date = qns.get_default_is_start_date_for_type(kind)
    sdd = pd.Timestamp(start_date)
    osd = pd.Timestamp(
        output.where(abs(output).sum('asset') > 0).dropna(
            'time', 'all').time.min().values)
    dsd = pd.Timestamp(data.time.min().values)
    if check_dates:
        if (dsd - sdd).days > 10:
            log_err("WARNING! There are not enough points in the data")
            log_err("The first point(" + str(dsd.date()) +
                    ") should be earlier than " + str(sdd.date()))
            log_err("Load data more historical data.")
        else:
            if len(data.sel(time=slice(None, sdd)).time) < 15:
                log_err(
                    "WARNING! There are not enough points in the data for the slippage calculation."
                )
                log_err(
                    "Add 15 extra data points to the data head (load data more historical data)."
                )
        if (osd - sdd).days > 7:
            log_err("WARNING! There are not enough points in the output.")
            log_err("The output series should start from " + str(sdd.date()) +
                    " or earlier instead of " + str(osd.date()))
    sd = max(sdd, dsd)
    sd = sd.to_pydatetime()
    fd = pd.Timestamp(data.time.max().values).to_pydatetime()
    log_info("Period: " + str(sd.date()) + " - " + str(fd.date()))
    output_slice = align(output, data.time, sd, fd)
    rr = qns.calc_relative_return(data, output_slice)
    sr = qns.calc_sharpe_ratio_annualized(rr)
    sr = sr.isel(time=-1).values
    return sr
Ejemplo n.º 13
0
def write(output):
    """
    writes output in the file for submission
    :param output: xarray with daily weights
    """
    import qnt.data.id_translation as idt
    from qnt.data.common import ds, get_env, track_event
    output = output.copy()
    output.coords[ds.ASSET] = [idt.translate_user_id_to_server_id(id) for id in output.coords[ds.ASSET].values]
    output = normalize(output)
    data = output.to_netcdf(compute=True)
    data = gzip.compress(data)
    path = get_env("OUTPUT_PATH", "fractions.nc.gz")
    log_info("Write output: " + path)
    with open(path, 'wb') as out:
        out.write(data)
    track_event("OUTPUT_WRITE")
Ejemplo n.º 14
0
def run_init():
    if os.path.exists("init.ipynb"):
        log_info("Run init.ipynb..")
        cmd = " jupyter nbconvert --to html --ExecutePreprocessor.timeout=1800 --execute init.ipynb --stdout "  + \
              "| html2text -utf8"
        # "\\\n 2>&1"
        log_info("cmd:", cmd)
        log_info("output:")
        proc = subprocess.Popen(cmd,
                                shell=True,
                                stdout=subprocess.PIPE,
                                stderr=subprocess.STDOUT,
                                executable='bash')
        for line in io.TextIOWrapper(proc.stdout, encoding="utf-8"):
            sys.stdout.write(line)
        proc.wait()
        code = proc.returncode
        log_info("return code:", code)
Ejemplo n.º 15
0
 def show_asset_stat(asset):
     if asset in stat_per_asset.asset.values.tolist():
         out = output.sel(asset=[asset])
         stat = stat_per_asset.sel(asset=asset)
     else:
         out = output
         stat = stat_global
     log_info("Output:")
     display_scrollable_output_table(out)
     log_info("Stats:")
     display_scrollable_stats_table(stat)
     make_major_plots(stat)
     log_info("---")
Ejemplo n.º 16
0
def assemble_output(add_mode='all'):
    log_info("Merge outputs...")
    files = os.listdir(result_dir)
    files = [f for f in files if f.endswith(".fractions.nc.gz")]
    files.sort()
    output = None

    if len(files) == 0:
        log_err("ERROR! There are no outputs.")

    for f in files:
        date = f.split(".")[0]
        date = datetime.date.fromisoformat(date)
        fn = result_dir + "/" + f
        _output = load_output(fn, date)
        _output = _output.where(_output.time <= np.datetime64(date)).dropna(
            'time', 'all')
        if len(_output) == 0:
            continue
        if output is None:
            log_info("init output:", fn,
                     str(_output.time.min().values)[:10],
                     str(_output.time.max().values)[:10])
            output = _output
        else:
            if add_mode == 'all':
                _output = _output.where(
                    _output.time > output.time.max()).dropna('time', 'all')
            elif add_mode == 'one':
                _output = _output.where(
                    _output.time == np.datetime64(date)).dropna('time', 'all')
            else:
                raise Exception("wrong add_mode")
            if len(_output) == 0:
                continue
            log_info("add output:", fn,
                     str(_output.time.min().values)[:10],
                     str(_output.time.max().values)[:10])
            output = xr.concat([output, _output], dim="time")
    return output
Ejemplo n.º 17
0
def load_facts(
        ciks: tp.List[str],
        facts: tp.List[str],
        types: tp.Union[None, tp.List[str]] = None,
        skip_segment: bool = False,
        period: tp.Union[str, None] = None,  # 'A', 'S', 'Q'
        columns: tp.Union[tp.List[str], None] = None,
        min_date: tp.Union[str, datetime.date, None] = None,
        max_date: tp.Union[str, datetime.date, None] = None,
        tail: tp.Union[datetime.timedelta, float, int] = DEFAULT_TAIL,
        group_by_cik: bool = False) -> tp.Generator[dict, None, None]:
    """
    Load SEC Forms (Fundamental data)
    :param ciks: list of cik (you can get cik from asset id)
    :param types: list of form types: ['10-K', '10-Q', '10-K/A', '10-Q/A']
    :param facts: list of facts for extraction, for example: ['us-gaap:Goodwill']
    :param skip_segment: skip facts with segment
    :param period: fact periods ('Q', 'A' or 'S')
    :param columns: list of columns to load: ['fact_name','unit_type','unit','segment','period_type','period','period_length','report_type','report_url','report_date']
    :param min_date: min form date
    :param max_date: max form date
    :param tail: datetime.timedelta, tail size of data. min_date = max_date - tail
    :return: generator
    """
    track_event("DATA_SECGOV_FACTS")
    max_date = parse_date(max_date)

    if min_date is not None:
        min_date = parse_date(min_date)
    else:
        min_date = max_date - parse_tail(tail)

    params = {
        'ciks': list(set(ciks)),
        'types': list(set(types)) if types is not None else None,
        'facts': list(set(facts)),
        'skip_segment': skip_segment,
        'columns': list(set(columns)) if columns is not None else None,
        'period': period,
        'min_date': min_date.isoformat(),
        'max_date': max_date.isoformat()
    }

    max_batch_size = min(50, SECGOV_BATCH_SIZE // len(facts))
    log_info("load secgov facts...")
    t = time.time()
    for offset in range(0, len(ciks), max_batch_size):
        batch_ciks = []
        if offset + max_batch_size > len(ciks):
            batch_ciks = ciks[offset:]
        else:
            batch_ciks = ciks[offset:(offset + max_batch_size)]
        params['ciks'] = batch_ciks
        params_js = json.dumps(params)
        raw = request_with_retry("sec.gov/facts", params_js.encode())
        js = raw.decode()
        facts = json.loads(js)
        if group_by_cik:
            facts = sorted(facts, key=lambda k: k['cik'])
            groups = itertools.groupby(facts, key=lambda f: f['cik'])
            for g in groups:
                yield (g[0], list(g[1]))
        else:
            for f in facts:
                yield f
        log_info("fetched chunk", (offset // max_batch_size + 1), '/',
                 math.ceil(len(ciks) / max_batch_size),
                 math.ceil(time.time() - t), 's')

    log_info("facts loaded.")
Ejemplo n.º 18
0
def check_correlation(portfolio_history, data, print_stack_trace=True):
    """ Checks correlation for current output. """
    track_event("CHECK_CORRELATION")
    portfolio_history = output_normalize(portfolio_history)
    rr = calc_relative_return(data, portfolio_history)

    try:
        cr_list = calc_correlation(rr, False)
    except:
        import logging
        if print_stack_trace:
            logging.exception("Correlation check failed.")
        else:
            log_err("Correlation check failed.")
        return

    log_info()

    if len(cr_list) == 0:
        log_info("Ok. This strategy does not correlate with other strategies.")
        return

    log_err(
        "WARNING! This strategy correlates with other strategies and will be rejected."
    )
    log_err("Modify the strategy to produce the different output.")
    log_info(
        "The number of systems with a larger Sharpe ratio and correlation larger than 0.9:",
        len(cr_list))
    log_info(
        "The max correlation value (with systems with a larger Sharpe ratio):",
        max([i['cofactor'] for i in cr_list]))
    my_cr = [i for i in cr_list if i['my']]

    log_info(
        "Current sharpe ratio(3y):",
        calc_sharpe_ratio_annualized(rr,
                                     calc_avg_points_per_year(data) *
                                     3)[-1].values.item())

    log_info()

    if len(my_cr) > 0:
        log_info("My correlated submissions:\n")
        headers = ['Name', "Coefficient", "Sharpe ratio"]
        rows = []

        for i in my_cr:
            rows.append([i['name'], i['cofactor'], i['sharpe_ratio']])

        log_info(tabulate(rows, headers))

    ex_cr = [i for i in cr_list if i['template']]
    if len(ex_cr) > 0:
        log_info("Correlated examples:\n")
        headers = ['Name', "Coefficient", "Sharpe ratio"]
        rows = []

        for i in ex_cr:
            rows.append([i['name'], i['cofactor'], i['sharpe_ratio']])

        log_info(tabulate(rows, headers))
Ejemplo n.º 19
0
def check_exposure(portfolio_history,
                   soft_limit=0.05,
                   hard_limit=0.1,
                   days_tolerance=0.02,
                   excess_tolerance=0.02,
                   avg_period=252,
                   check_period=252 * 5):
    """
    Checks exposure according to the submission filters.
    :param portfolio_history: output DataArray
    :param soft_limit: soft limit for exposure
    :param hard_limit: hard limit for exposure
    :param days_tolerance: the number of days when exposure may be in range 0.05..0.1
    :param excess_tolerance: max allowed average excess
    :param avg_period: period for the ratio calculation
    :param check_period: period for checking
    :return:
    """
    portfolio_history = portfolio_history.loc[{
        ds.TIME:
        np.sort(portfolio_history.coords[ds.TIME])
    }]

    exposure = calc_exposure(portfolio_history)
    max_exposure = exposure.max(ds.ASSET)

    max_exposure_over_limit = max_exposure.where(
        max_exposure > soft_limit).dropna(ds.TIME)
    if len(max_exposure_over_limit) > 0:
        max_exposure_asset = exposure.sel({
            ds.TIME:
            max_exposure_over_limit.coords[ds.TIME]
        }).idxmax(ds.ASSET)
        log_info("Positions with max exposure over the limit:")
        pos = xr.concat([max_exposure_over_limit, max_exposure_asset],
                        pd.Index(['exposure', 'asset'], name='field'))
        log_info(pos.to_pandas().T)

    periods = min(avg_period, len(portfolio_history.coords[ds.TIME]))

    bad_days = xr.where(max_exposure > soft_limit, 1.0, 0.0)
    bad_days_proportion = bad_days[-check_period:].rolling(dim={
        ds.TIME: periods
    }).mean()
    days_ok = xr.where(bad_days_proportion > days_tolerance, 1,
                       0).sum().values == 0

    excess = exposure - soft_limit
    excess = excess.where(excess > 0, 0).sum(ds.ASSET)
    excess = excess[-check_period:].rolling(dim={ds.TIME: periods}).mean()
    excess_ok = xr.where(excess > excess_tolerance, 1, 0).sum().values == 0

    hard_limit_ok = xr.where(max_exposure > hard_limit, 1, 0).sum().values == 0

    if hard_limit_ok and (days_ok or excess_ok):
        log_info("Ok. The exposure check succeed.")
        return True
    else:
        log_err("WARNING! The exposure check failed.")
        log_info("Hard limit check: ", 'Ok.' if hard_limit_ok else 'Failed.')
        log_info("Days check: ", 'Ok.' if days_ok else 'Failed.')
        log_info("Excess check:", 'Ok.' if excess_ok else 'Failed.')
        return False
Ejemplo n.º 20
0
                     forward_order=True)
    data_pd = (data.sel(field="high").to_pandas(),
               data.sel(field="low").to_pandas(),
               data.sel(field="close").to_pandas())

    t1 = time.time()

    adx1 = ADX(data, 14)

    t2 = time.time()

    _plus_di, _minus_di, _adx, _adxr = dms(data.sel(field="high"),
                                           data.sel(field="low"),
                                           data.sel(field="close"), 14, 14, 14)

    t3 = time.time()
    (pd_res) = dms(data_pd[0], data_pd[1], data_pd[2], 14, 14, 14)
    t4 = time.time()

    log_info(t2 - t1, t3 - t2, t4 - t3, "relative delta =",
             abs(adx1 - _adx).mean().values, "t(talib)/t(this) =",
             (t2 - t1) / (t3 - t2))

    import matplotlib.pyplot as plt

    plt.plot(adx1.coords[ds.TIME].values,
             adx1.sel(asset='NASDAQ:AAPL').values, 'r')
    plt.plot(_adx.coords[ds.TIME].values,
             _adx.sel(asset='NASDAQ:AAPL').values, 'g')
    plt.show()
Ejemplo n.º 21
0
def clean(output, data, kind=None, debug=True):
    """
    Checks the output and fix common errors:
        - liquidity
        - missed dates
        - exposure
        - normalization
    :param output:
    :param data:
    :param kind:
    :return:
    """
    import qnt.stats as qns
    import qnt.exposure as qne
    from qnt.data.common import ds, f, track_event

    if kind is None:
        kind = data.name

    output = output.drop(ds.FIELD, errors='ignore')

    with LogSettings(err2info=True):
        log_info("Output cleaning...")

        single_day = ds.TIME not in output.dims

        if not single_day:
            track_event("OUTPUT_CLEAN")

        if single_day:
            output = output.drop(ds.TIME, errors='ignore')
            output = xr.concat([output], pd.Index([data.coords[ds.TIME].values.max()], name=ds.TIME))
        else:
            log_info("ffill if the current price is None...")
            output = output.fillna(0)
            output = output.where(np.isfinite(data.sel(field='close')))
            output = output.ffill('time')
            output = output.fillna(0)

        if kind == "stocks" or kind == "stocks_long":
            log_info("Check liquidity...")
            non_liquid = qns.calc_non_liquid(data, output)
            if len(non_liquid.coords[ds.TIME]) > 0:
                log_info("WARNING! Strategy trades non-liquid assets.")
                log_info("Fix liquidity...")
                is_liquid = data.sel(field=f.IS_LIQUID)
                is_liquid = xr.align(is_liquid, output, join='right')[0]
                output = xr.where(is_liquid == 0, 0, output)
            log_info("Ok.")

        if not single_day:
            log_info("Check missed dates...")
            missed_dates = qns.find_missed_dates(output, data)
            if len(missed_dates) > 0:
                log_info("WARNING! Output contain missed dates.")
                log_info("Adding missed dates and set zero...")
                add = xr.concat([output.isel(time=-1)] * len(missed_dates), pd.DatetimeIndex(missed_dates, name="time"))
                add = xr.full_like(add, np.nan)
                output = xr.concat([output, add], dim='time')
                output = output.fillna(0)
                if kind == "stocks" or kind == "stocks_long":
                    output = output.where(data.sel(field='is_liquid') > 0)
                output = output.dropna('asset', 'all').dropna('time', 'all').fillna(0)
                output = normalize(output)
            else:
                log_info("Ok.")

        if kind == 'stocks_long':
            log_info("Check positive positions...")
            neg = output.where(output < 0).dropna(ds.TIME, 'all')
            if len(neg.time) > 0:
                log_info("WARNING! Output contains negative positions. Clean...")
                output = output.where(output >= 0).fillna(0)
            else:
                log_info("Ok.")

        if kind == "stocks" or kind == "stocks_long":
            log_info("Check exposure...")
            if not qns.check_exposure(output):
                log_info("Cut big positions...")
                output = qne.cut_big_positions(output)
                log_info("Check exposure...")
                if not qns.check_exposure(output):
                    log_info("Drop bad days...")
                    output = qne.drop_bad_days(output)

        if kind == "crypto":
            log_info("Check BTC...")
            if output.where(output != 0).dropna("asset", "all").coords[ds.ASSET].values.tolist() != ['BTC']:
                log_info("WARNING! Output contains not only BTC.")
                log_info("Fixing...")
                output=output.sel(asset=['BTC'])
            else:
                log_info("Ok.")

        log_info("Normalization...")
        output = normalize(output)
        log_info("Output cleaning is complete.")

    return output
Ejemplo n.º 22
0
    url = tracking_host + '/engine/tracklib?apiKey=' + api_key + '&event=' + event
    if 'STRATEGY_ID' in os.environ:
        url = url + '&strategyId=' + os.environ.get('STRATEGY_ID', '')
    t = threading.Thread(target=get_url_silent, args=(url,))
    t.start()


def get_url_silent(url):
    try:
        urllib.request.urlopen(url)
    except:
        pass


if __name__ == '__main__':
    log_info(parse_max_datetime_from_url('http://hl.datarelay:7070/last/2020-10-07T10/'))
    log_info(parse_max_datetime_from_url('http://hl.datarelay:7070/last/2016-10-28/'))
    # t = parse_max_datetime_from_url('http://hl.datarelay:7070/last/2020-10-07T10/')
    # print(datetime.datetime.combine(t.date(), datetime.time.min))


# TODO Strange stuff, need to check usage

def from_xarray_3d_to_dict_of_pandas_df(xarray_data):
    assets_names = xarray_data.coords[ds.ASSET].values
    pandas_df_dict = {}
    for asset_name in assets_names:
        pandas_df_dict[asset_name] = xarray_data.loc[:, :, asset_name].to_pandas()

    return pandas_df_dict
Ejemplo n.º 23
0
def check(output, data, kind=None):
    """
    This function checks your output and warn you if it contains errors.
    :return:
    """
    import qnt.stats as qns
    from qnt.data.common import ds, f, get_env, track_event

    if kind is None:
        kind = data.name

    single_day = ds.TIME not in output.dims
    if single_day:
        output = xr.concat([output], pd.Index([data.coords[ds.TIME].values.max()], name=ds.TIME))

    try:
        if kind == "stocks" or kind == "stocks_long":
            log_info("Check liquidity...")
            non_liquid = qns.calc_non_liquid(data, output)
            if len(non_liquid.coords[ds.TIME]) > 0:
                log_err("ERROR! Strategy trades non-liquid assets.")
                log_err("Multiply the output by data.sel(field='is_liquid') or use qnt.output.clean")
            else:
                log_info("Ok.")

        if not single_day:
            log_info("Check missed dates...")
            missed_dates = qns.find_missed_dates(output, data)
            if len(missed_dates) > 0:
                log_err("ERROR! Some dates were missed)")
                log_err("Your strategy dropped some days, your strategy should produce a continuous series.")
            else:
                log_info("Ok.")
            track_event("OUTPUT_CHECK")

        if kind == "stocks" or kind == "stocks_long":
            log_info("Check exposure...")
            if not qns.check_exposure(output):
                log_err("Use more assets or/and use qnt.output.clean")

        if kind == "crypto":
            log_info("Check BTC...")
            if output.where(output != 0).dropna("asset", "all").coords[ds.ASSET].values.tolist() != ['BTC']:
                log_err("ERROR! Output contains not only BTC.\n")
                log_err("Remove the other assets from the output or use qnt.output.clean")
            else:
                log_info("Ok.")

        if not single_day:
            if abs(output).sum() == 0:
                log_err("ERROR! Output is empty. All positions are zero.")
            else:
                # if kind == 'crypto' or kind == 'cryptofutures' or kind == 'crypto_futures':
                #     log_info("Check holding time...")
                #     ht = qns.calc_avg_holding_time(output)
                #     ht = ht.isel(time=-1).values
                #     if ht < 4:
                #         log_err("ERROR! The holding time is too low.", ht, "<", 4)
                #     else:
                #         log_info("Ok.")
                #
                # if kind == 'stocks_long':
                #     log_info("Check holding time...")
                #     ht = qns.calc_avg_holding_time(output)
                #     ht = ht.isel(time=-1).values
                #     if ht < 15:
                #         log_err("ERROR! The holding time is too low.", ht, "<", 15)
                #     else:
                #         log_info("Ok.")

                if kind == 'stocks_long':
                    log_info("Check positive positions...")
                    neg = output.where(output < 0).dropna(ds.TIME, 'all')
                    if len(neg.time) > 0:
                        log_err("ERROR! Output contains negative positions.")
                        log_err("Drop all negative positions.")
                    else:
                        log_info("Ok.")

                log_info("Check the sharpe ratio...")

                sr = calc_sharpe_ratio_for_check(data, output, kind, True)
                log_info("Sharpe Ratio =", sr)

                if sr < 1:
                    log_err("ERROR! The Sharpe Ratio is too low.", sr, '<', 1,)
                    log_err("Improve the strategy and make sure that the in-sample Sharpe Ratio more than 1.")
                else:
                    log_info("Ok.")

                log_info("Check correlation.")
                qns.check_correlation(output, data, False)
    except Exception as e:
        log_err(e)
Ejemplo n.º 24
0
def load_origin_data(assets=None,
                     min_date=None,
                     max_date=None,
                     tail: tp.Union[datetime.timedelta, float, int] = 4 * 365):
    track_event("DATA_STOCKS_SERIES")
    setup_ids()

    if assets is not None:
        assets = [a['id'] if type(a) == dict else a for a in assets]

    if assets is None:
        assets_array = load_list(min_date=min_date,
                                 max_date=max_date,
                                 tail=tail)
        assets_arg = [a['id'] for a in assets_array]
    else:
        assets_arg = assets
    assets_arg = [idt.translate_user_id_to_server_id(id) for id in assets_arg]

    assets_arg = list(set(assets_arg))  # rm duplicates

    # load data from server
    if max_date is None and "LAST_DATA_PATH" in os.environ:
        whole_data_file_flag_name = get_env("LAST_DATA_PATH", "last_data.txt")
        with open(whole_data_file_flag_name, "w") as text_file:
            text_file.write("last")

    max_date = parse_date(max_date)

    if min_date is not None:
        min_date = parse_date(min_date)
    else:
        min_date = max_date - parse_tail(tail)

    # print(str(max_date))

    if min_date > max_date:
        raise Exception("min_date must be less than or equal to max_date")

    start_time = time.time()

    days = (max_date - min_date).days + 1
    chunk_asset_count = math.floor(BATCH_LIMIT / days)

    chunks = []
    assets_arg.sort()

    for offset in range(0, len(assets_arg), chunk_asset_count):
        chunk_assets = assets_arg[offset:(offset + chunk_asset_count)]
        chunk = load_origin_data_chunk(chunk_assets, min_date.isoformat(),
                                       max_date.isoformat())
        if chunk is not None:
            chunks.append(chunk)
        log_info("fetched chunk " +
                 str(round(offset / chunk_asset_count + 1)) + "/" +
                 str(math.ceil(len(assets_arg) / chunk_asset_count)) + " " +
                 str(round(time.time() - start_time)) + "s")

    fields = [
        f.OPEN, f.LOW, f.HIGH, f.CLOSE, f.VOL, f.DIVS, f.SPLIT,
        f.SPLIT_CUMPROD, f.IS_LIQUID
    ]
    if len(chunks) == 0:
        whole = xr.DataArray([[[np.nan]]] * len(fields),
                             dims=[ds.FIELD, ds.TIME, ds.ASSET],
                             coords={
                                 ds.FIELD: fields,
                                 ds.TIME: pd.DatetimeIndex([max_date]),
                                 ds.ASSET: ['ignore']
                             })[:, 1:, 1:]
    else:
        whole = xr.concat(chunks, ds.ASSET)

    whole.coords[ds.ASSET] = [
        idt.translate_server_id_to_user_id(id)
        for id in whole.coords[ds.ASSET].values
    ]

    if assets is not None:
        assets = sorted(assets)
        assets = xr.DataArray(assets,
                              dims=[ds.ASSET],
                              coords={ds.ASSET: assets})
        whole = whole.broadcast_like(assets)

    whole = whole.transpose(ds.FIELD, ds.TIME, ds.ASSET)
    whole = whole.loc[fields,
                      np.sort(whole.coords[ds.TIME])[::-1],
                      np.sort(whole.coords[ds.ASSET])]

    return whole.dropna(ds.TIME, 'all')
Ejemplo n.º 25
0
        return lwma(series, weights)
    if type(weights) is list:
        weights = np.array(weights, np.float64)
    return nda.nd_universal_adapter(wma_np_1d, (series, ), (weights, ))


def lwma(series: nda.NdType, periods: int = 20):
    return nda.nd_universal_adapter(lwma_np_1d, (series, ), (periods, ))


def vwma(price: nda.NdType, volume: nda.NdType, periods: int = 20):
    return nda.nd_universal_adapter(vwma_np_1d, (price, volume), (periods, ))


if __name__ == '__main__':
    log_info(np.divide(1., 0.))

    d1_array = np.array([0, 1, 2, 3, 4, np.nan, 5, np.nan, 6, 7], np.double)
    d1_result_lwma = lwma(d1_array, 3)
    d1_result_wma = wma(d1_array, [3, 2, 1])
    d1_result_vwma = vwma(d1_array, d1_array, 3)
    log_info("d1_array:\n", d1_array, '\n')
    log_info('d1_result_lwma:\n', d1_result_lwma)
    log_info('d1_result_wma:\n', d1_result_wma)
    log_info('d1_result_vwma:\n', d1_result_vwma)
    log_info('---')

    np_array = np.array([[
        [1, 2, 3, 4],
        [5, 6, 7, 8],
    ], [
Ejemplo n.º 26
0
def tema(series: nda.NdType,
         periods: int = 20,
         warm_periods: tp.Union[int, None] = None) -> nda.NdType:
    """
    Triple Exponential Moving Average
    """
    ma = ema(series, periods, warm_periods)
    ma = ema(ma, periods, warm_periods)
    ma = ema(ma, periods, warm_periods)
    return ma


if __name__ == '__main__':
    d1_array = np.array([0, 1, 2, 3, 4, np.nan, 5, np.nan, 6, 7], np.double)
    d1_result = ema(d1_array, 3)
    log_info("d1_array:\n", d1_array, '\n')
    log_info('d1_result:\n', d1_result)
    log_info('---')

    date_rng = pd.date_range(start='2018-01-01', end='2018-01-10', freq='D')
    series_in = pd.Series(d1_array, date_rng)
    series_out = ema(series_in, 3)
    log_info("series_in:\n", series_in, '\n')
    log_info('series_out:\n', series_out)
    log_info('---')

    np_array = np.array([[
        [1, 2, 3, 4],
        [5, 6, 7, 8],
    ], [
        [2, 3, 4, 5],
Ejemplo n.º 27
0
    :return:
    """
    if isinstance(prices, np.ndarray):
        return ad_line_np(prices)
    return nd_to_1d_universal_adapter(ad_line_np, (prices, ), ())


if __name__ == '__main__':
    from qnt.data import load_data, load_assets, ds

    assets = load_assets()
    ids = [i['id'] for i in assets[0:2000]]

    data = load_data(assets=ids,
                     dims=(ds.TIME, ds.ASSET, ds.FIELD),
                     forward_order=True).sel(field="close")

    adr = ad_ratio(data)
    adr_pd = ad_ratio(data.to_pandas())
    log_info(adr.to_pandas() - adr_pd.T)

    adl = ad_line(data)

    import matplotlib.pyplot as plt

    plt.plot(adr.coords[ds.TIME].values, adr.values, 'r')
    plt.show()

    plt.plot(adl.coords[ds.TIME].values, adl.values, 'g')
    plt.show()
Ejemplo n.º 28
0
def evaluate_passes(data_type='stocks', passes=3, dates=None):

    log_info("Output directory is:", result_dir)
    os.makedirs(result_dir, exist_ok=True)

    log_info("Rm previous results...")
    for i in os.listdir(result_dir):
        fn = result_dir + "/" + i
        if os.path.isfile(fn):
            log_info("rm:", fn)
            os.remove(fn)

    if dates is None:
        log_info("Prepare test dates...")
        min_date = (pd.Timestamp(
            qnt.stats.get_default_is_start_date_for_type(data_type))
                    ).to_pydatetime()
        data = qnt.data.load_data_by_type(data_type, min_date=min_date)
        if 'is_liquid' in data.field:
            data = data.where(data.sel(field='is_liquid') > 0).dropna(
                'time', 'all')
        data = data.time
        dates = [data.isel(time=-1).values, data.isel(time=1).values] \
                + [data.isel(time=round(len(data) * (i+1)/(passes-1))).values for i in range(passes-2)]
        dates = list(set(dates))
        dates.sort()
        dates = [pd.Timestamp(i).date() for i in dates]

        del data
    else:
        dates = [qnt.data.common.parse_date(d) for d in dates]

    log_info("Dates:", *(i.isoformat() for i in dates))

    i = 0
    for date in dates:
        try:
            os.remove(fractions_fn)
        except FileNotFoundError:
            pass
        try:
            os.remove(last_data_fn)
        except FileNotFoundError:
            pass
        try:
            os.remove(html_fn)
        except FileNotFoundError:
            pass

        log_info("---")
        i += 1
        log_info("pass:"******"/", len(dates), "max_date:", date.isoformat())

        if data_type == 'stocks' or data_type == 'stocks_long':
            timeout = 30 * 60
        if data_type == 'futures':
            timeout = 10 * 60
        if data_type == 'crypto' or data_type == 'crypto_futures' or data_type == 'cryptofutures':
            timeout = 5 * 60

        data_url = urllib.parse.urljoin(
            urllib.parse.urljoin(qnt.data.common.BASE_URL, 'last/'),
            date.isoformat()) + "/"
        cmd = "DATA_BASE_URL=" + data_url + " \\\n" + \
              "LAST_DATA_PATH=" + last_data_fn + " \\\n" + \
              "OUTPUT_PATH=" + fractions_fn + " \\\n" + \
              "SUBMISSION_ID=-1\\\n" + \
              " jupyter nbconvert --to html --ExecutePreprocessor.timeout=" + str(timeout)+ " --execute strategy.ipynb --output=" + html_fn  # + \
        # "\\\n 2>&1"
        log_info("cmd:", cmd)
        log_info("output:")
        proc = subprocess.Popen(cmd,
                                shell=True,
                                stdout=subprocess.PIPE,
                                stderr=subprocess.STDOUT,
                                executable='bash')
        for line in io.TextIOWrapper(proc.stdout, encoding="utf-8"):
            sys.stdout.write(line)
        proc.wait()
        code = proc.returncode
        log_info("return code:", code)

        if not os.path.exists(fractions_fn):
            log_err("ERROR! Output is not found.")
        if not os.path.exists(last_data_fn):
            log_err("ERROR! The strategy does not use all data.")
        if not os.path.exists(html_fn):
            log_err("ERROR! Conversion to html failed.")
        if code != 0:
            log_err("ERROR! Return code != 0.")

        if os.path.exists(fractions_fn):
            log_info("Check the output...")
            output = load_output(fractions_fn, date)

            if data_type == 'stocks' or data_type == 'stocks_long':
                qnt.stats.check_exposure(output)

            log_info("Load data...")
            data = qnt.data.load_data_by_type(
                data_type,
                assets=output.asset.values.tolist(),
                min_date=str(output.time.min().values)[:10],
                max_date=date)

            if data_type == 'stocks' or data_type == 'stocks_long':
                non_liquid = qnt.stats.calc_non_liquid(data, output)
                if len(non_liquid.time) > 0:
                    log_err("ERROR! The output contains illiquid positions.")

            missed = qnt.stats.find_missed_dates(output, data)
            if len(missed) > 0:
                log_err("ERROR: some dates are missed in the output.", missed)
            else:
                log_info("There are no missed dates.")

            del data

        try:
            shutil.move(
                fractions_fn,
                result_dir + "/" + date.isoformat() + ".fractions.nc.gz")
        except FileNotFoundError:
            pass
        try:
            shutil.move(last_data_fn,
                        result_dir + "/" + date.isoformat() + ".last_data.txt")
        except FileNotFoundError:
            pass
        try:
            shutil.move(html_fn,
                        result_dir + "/" + date.isoformat() + ".strategy.html")
        except FileNotFoundError:
            pass

    log_info("---")
    log_info("Evaluation complete.")
Ejemplo n.º 29
0
import numba as nb
import qnt.ta.ndadapter as nda
from qnt.log import log_info, log_err


@nb.jit(nb.float64[:](nb.float64[:], nb.int64), nopython=True)
def shift_np_1d(series: np.ndarray, periods: int) -> np.ndarray:
    if periods < 0:
        return np.flip(shift_np_1d(np.flip(series), -periods))
    tail = np.empty((periods + 1,), dtype=np.double)
    not_nan_cnt = 0
    result = np.full(series.shape, np.nan, dtype=np.double)
    for i in range(series.shape[0]):
        if not np.isnan(series[i]):
            idx = not_nan_cnt % tail.shape[0]
            tail[idx] = series[i]
            if not_nan_cnt >= periods:
                result[i] = tail[idx - periods]
            not_nan_cnt += 1
    return result


def shift(series: nda.NdType, periods: int = 1) -> nda.NdType:
    return nda.nd_universal_adapter(shift_np_1d, (series,), (periods,))


if __name__ == "__main__":
    arr = np.array([1, 2, np.nan, 4, 5, np.nan, 7, 9, 0], np.double)
    sh = shift(arr, 2)
    log_info(sh)
Ejemplo n.º 30
0
        return None
    arr = xr.open_dataarray(raw, cache=False, decode_times=True)
    arr = arr.compute()
    return arr


FIRST = True


def setup_ids():
    global FIRST
    if idt.USE_ID_TRANSLATION and FIRST:
        js = request_with_retry('assets', None)
        js = js.decode()
        tickers = json.loads(js)
        idt.USE_ID_TRANSLATION = next(
            (i for i in tickers if i.get('FIGI') is not None),
            None) is not None
        FIRST = False


if __name__ == '__main__':
    # import qnt.id_translation
    # qnt.id_translation.USE_ID_TRANSLATION = False
    assets = load_list()
    log_info(len(assets))
    ids = [i['id'] for i in assets]
    log_info(ids)
    data = load_data(min_date='1998-11-09', assets=ids[-2000:])
    log_info(data.sel(field='close').transpose().to_pandas())