Esempio n. 1
0
def try_fix_missing(ticker):
    """Attempt to fix missing values in DataFrame by using rules

    :param ticker: str - Customized ticker
    :return: Number of missing values left
    """
    log = logging.getLogger(__name__)

    # Get DataFrame
    df = odu.get_market_df(ticker)
    before = df.isnull().sum().sum()
    log.debug('Missing values before: {}'.format(before))

    fields = ['Open', 'High', 'Low', 'Close']
    for idx in range(len(df)):
        row = df.iloc[idx]
        rvals = row[fields]
        missing = rvals.isnull().sum()
        # Rule 1: If 3 missing values, then all values should be equal!
        if missing == 3:
            value = rvals.dropna()[0]
            df.iloc[idx] = row.fillna(value)
        # Rule 2: If Open and Close are missing, then Low goes to Open and High to Close
        if missing == 2 and math.isnan(row['Open']) and math.isnan(row['Close']):
            df.iloc[idx]['Open'] = row['Low']
            df.iloc[idx]['Close'] = row['High']
    # Save DataFrame
    odu.save_market_df(ticker, df)
    after = df.isnull().sum().sum()
    log.warning('Missing values after: {} - fixed: {}'.format(after, before - after))

    return after
Esempio n. 2
0
def calculate_stds(df, length=20):
    """Calculate standard deviations for the spread in the specified dataframe. Returns a dataframe with a few
    extra columns. This is to be used in Excel.

    :param df: DataFrame universe of traded symbols
    :param length: Length for the standard deviation
    :return: DataFrame with results
    """
    log = logging.getLogger(__name__)

    df_stds = []
    for idx, row in df.iterrows():
        ticker = row['Ticker']
        log.debug('Process spread: {}'.format(ticker))
        # STD
        dfm = odu.get_market_df(ticker)
        if dfm is not None:
            dfm = dfm.tail(length)
            std = dfm['Close'].std()
        else:
            std = 0
        # COT
        cdf = odc.cot_data(ticker[0:2])
        hps = cdf.iloc[-1]['HPS']
        hph = cdf.iloc[-1]['HPH']
        # Add data
        df_stds.append({'Ticker': ticker, 'LastDate': row['LastDate'], 'DaysTo': row['DaysTo'],
                        'HPS': hps, 'HPH': hph, 'PVol':  std, 'IVol': std * i.get(ticker[0:2], 'Point')})
    # Return dataframe
    return pd.DataFrame(df_stds, columns=['Ticker', 'LastDate', 'DaysTo', 'HPS', 'HPH', 'PVol', 'IVol'])
Esempio n. 3
0
def test_roll_yield():
    ticker = 'LBS2H18'
    df = odu.get_market_df(ticker)
    ry = odu.roll_yield(ticker, df)
    assert isinstance(ry, pd.Series)
    ndf = df
    ndf['RollYield'] = ry
    assert isinstance(ndf, pd.DataFrame)
    assert df.index[0] == ndf.index[0]
    assert df.index[-1] == ndf.index[-1]
    print(ndf.head())
    print(ndf.tail())
Esempio n. 4
0
    def initialize_data(self, days_back=90, extra_days=True, partial=False):
        """Get an ordered dictionary of DataFrames with the corresponding data.

        :param days_back: int - Number of days back for the data (default: 90)
        :param extra_days: bool - Option to add a day before and the last day (LTD or FND) - Used to load data
        :param partial: bool - Option to load data even if length is smaller than days_back
        :return: DataFrame
        """
        self.days_back = days_back
        if self.chain is None:
            self.initialize_contracts(Status.Expired)
        # Get all the dataframes
        dfs = co.OrderedDict()
        for index, row in self.chain.iterrows():
            sdf = odu.get_market_df(row['Ticker'])
            if sdf is not None and (len(sdf) >= self.days_back or partial):
                # Roll Yield for Spreads
                if self.future_type == oci.FutureType.Spread:
                    sdf['RollYield'] = odu.roll_yield(row['Ticker'], sdf)
                if extra_days:
                    sdf = sdf[sdf.index <= pd.to_datetime(row['LastDate'])]
                    dfs[row['Ticker']] = sdf.iloc[-(days_back + 1):]
                else:
                    # LTD or FND should be excluded as they can be volatile (and can't trade FND)
                    sdf = sdf[sdf.index < pd.to_datetime(row['LastDate'])]
                    dfs[row['Ticker']] = sdf.iloc[-days_back:]
                # Find start date
                if self.status == Status.ActiveLive:
                    start = odu.get_start_date(row['LastDate'],
                                               days_back=days_back)
                    sdf = sdf[sdf.index >= start]
                    if len(sdf) > 0:
                        dfs[row['Ticker']] = sdf
                    else:
                        # Remove from the contracts and the chain DataFrame
                        self.chain.drop(index, inplace=True)
                        self.contracts.remove(row['Ticker'])
                        self.log.warning('Removing {} as too early!'.format(
                            row['Ticker']))
            else:
                # Remove from the contracts and the chain DataFrame
                self.chain.drop(index, inplace=True)
                self.contracts.remove(row['Ticker'])
                self.log.warning(
                    'File not found or not enough data for: {}!'.format(
                        row['Ticker']))

        self.chain.reset_index(inplace=True, drop=True)
        self.data = dfs
        return self.data
Esempio n. 5
0
def download_list(status, symbols=None, interval='daily', override_last=False):
    """Download all data for a specific list as defined in the list module.

    :param status: Enum Status - Active/ActivePlus/All/Expired Status
    :param symbols: List of symbols to download (Stem should be provided) - If not provided, will go through the universe
    :param interval: String data interval, possible values: 'minute', 'daily' or both
    :param override_last: bool - To force re-downloading data for current day
    """
    log = logging.getLogger(__name__)

    log.info('Download {}'.format(interval))
    symbols = oci.json_db if symbols is None else symbols
    for m in symbols:
        log.debug('Download data for {}'.format(m))
        # Generate tickers for download
        sdf = ocl.generate_tickers_df(m, status)
        # Go through all tickers
        for idx, row in sdf.iterrows():
            ticker = row['Ticker']
            # Get last entry
            odf = odu.get_market_df(ticker)
            last_date = '1900-01-01'
            start = '1900-01-01'
            if odf is not None:
                last_date = dt.datetime.strftime(odf.index[-1], '%Y-%m-%d')
                start = dt.datetime.strftime(odf.index[-1] - o.BDay(3),
                                             '%Y-%m-%d')
            if last_date != dt.datetime.today().strftime(
                    '%Y-%m-%d') or override_last:
                # Download Data & Save to the file
                df = get_ohlcv_data(ticker, interval, start,
                                    dt.datetime.today().strftime('%Y-%m-%d'))
                if df is not None:
                    odu.save_market_df(ticker, df)
            else:
                log.info(
                    'Do not download {} as it has already been downloaded'.
                    format(ticker))
Esempio n. 6
0
def inspect_files(stems=None, future_type=None):
    """File inspection. Go through all the datafiles in the Daily folder and check for missing values.

    :param stems: list - List of Futures to inspect
    :param future_type: enum FutureType - Type of Futures to inspect (Outright, Spread, etc...)
    :return: dict - Problematic tickers to be checked
    """
    log = logging.getLogger(__name__)

    inspect = {}
    for root, dirs, files in os.walk(op.join(oc.cfg['default']['data'], 'Daily')):
        for f in files:
            ticker = f.split('.')[0]
            if stems is not None and ticker[0:2] not in stems:
                continue
            if future_type is not None and oci.get_future_type(ticker) != future_type:
                continue
            df = odu.get_market_df(ticker)
            if df is not None:
                missing = df.isnull().sum().sum()
                if missing > 0:
                    inspect[ticker] = missing

    return inspect
Esempio n. 7
0
def get_forecasts_and_data():
    start = dt.datetime(2016, 1, 1)

    liborff = calculate_liborffm()
    fcs = []
    dfs = []
    for m in months_generator(start):
        date = m.strftime('%Y-%m-%d')
        m_rule = m.strftime('%m')
        if debug:
            print('Process Month with ending: {}!'.format(date))
        # ED
        ed_rule = rules_maturities[m_rule]['ED']
        ed_ctrmth = int('{}{}'.format(
            int(date[0:4]) + (1 if m_rule in ['09', '10', '11', '12'] else 0),
            ed_rule))
        if debug:
            print('ED {} {}'.format(date, ed_ctrmth))
        # FF
        ff_rule = rules_maturities[m_rule]['FF']
        plus_one = ['06', '07', '08', '09', '10', '11', '12']
        ff_ctrmth = [
            int('{}{}'.format(
                int(date[0:4]) + (1 if m_rule in plus_one else 0), r))
            for r in ff_rule
        ]
        if debug:
            print('FF {} {}'.format(date, ff_ctrmth))
        # Get dataframes
        start_m = (m - rd.relativedelta(months=1)).strftime('%Y-%m-%d')
        next_m = (m + rd.relativedelta(months=1)).strftime('%Y-%m-%d')
        ed_df = odu.get_market_df(
            '{}{}'.format('ED', i.ym_maturity(ed_ctrmth)), start_m, next_m)
        ff_df = [
            odu.get_market_df('{}{}'.format('FF', i.ym_maturity(cm)), start_m,
                              next_m) for cm in ff_ctrmth
        ]
        if False:
            print(ed_df.tail())
            [print(df.tail()) for df in ff_df]
        # Calculate Forecast
        ed_value = get_last_value(ed_df, date, ed_ctrmth)
        if ed_value == 0:
            print('Null value for ED!')
        ff_values = [
            get_last_value(ff_df[x], date, ff_ctrmth[x]) for x in range(0, 3)
        ]
        if sum(ff_values) == 0:
            print('All values are null for date: {} - see: {}!'.format(
                date, ff_values))
            continue
        # Calculate forecast and add to the list
        ff_values = [v for v in ff_values if v > 0]
        fc = (100 - ed_value) - (100 - sum(ff_values) / len(ff_values))
        fcs.append([date, fc])
        if debug:
            print('Values: {} {} - Forecast: {:.2f} - LIBORFF: {:.2f}'.format(
                ed_value, ff_values, fc, liborff[date] * 100))
        # Construct DataFrame and add to the list
        sfc = pd.Series(data=[(fc - liborff[date] * 100)
                              for _ in range(len(ed_df.index))],
                        index=ed_df.index)
        df = pd.concat([
            ed_df['Close'], ff_df[0]['Close'], ff_df[1]['Close'],
            ff_df[2]['Close'], sfc
        ],
                       axis=1)
        df.columns = ['ED', 'FF1', 'FF2', 'FF3', 'FC']
        df = df[df.index > date]
        if debug:
            print(df.head())
        dfs.append(df.dropna())

    # DataFrame
    return pd.concat(dfs)
Esempio n. 8
0
def test_fix_settlement():
    ticker = 'FCS1V12'
    df = odu.get_market_df(ticker)
    sdf = odd.fix_settlement(ticker, df, False)
    print(sdf)
Esempio n. 9
0
def test_save_market_df():
    ticker = 'FCF10'
    df = odu.get_market_df(ticker)
    odu.save_market_df(ticker, df)
Esempio n. 10
0
def test_get_market_df():
    df = odu.get_market_df('EDM18')
    assert isinstance(df, pd.DataFrame)
    print(df.head())