Ejemplo n.º 1
0
def clean_seas_df_for_min_max_average(seas, range):
    """
    Given a seasonalised dataframe, clean to handle missing data
    :param seas:
    :return:
    """
    seas = seas.dropna(how='all', axis=1)
    seasf = seas.rename(columns=dates.find_year(seas))

    # only consider when we have full(er) data for a given range
    fulldata = pd.DataFrame(seasf.isna().sum())  # count non-na values
    if not (fulldata == 0).all(
    ).iloc[0]:  # line below doesn't apply when we have full data for all columns
        fulldata = fulldata[fulldata.apply(
            lambda x: np.abs(x - x.mean()) / x.std() < 1.5).all(
                axis=1)]  # filter columns with high emtply values
    seasf = seasf[fulldata.index]  # use these column names only

    if isinstance(range, int):
        end_year = dates.curyear - 1
        start_year = end_year - (range - 1)
    else:
        start_year, end_year = range[0], range[1]

    r = seasf[[x for x in seasf.columns if x >= start_year and x <= end_year]]
    return r
Ejemplo n.º 2
0
def reindex_year(df):
    """
    Reindex a dataframe containing prices to the current year.
    eg dataframe with brent Jan 19, Jan 18, Jan 17   so that 18 is shifted +1 year and 17 is shifted +2 years
    """
    dfs = []
    colyearmap = dates.find_year(df)
    for colname in df.columns:
        if df[colname].isnull().all():
            continue  # logic below wont work on all empty NaN columns

        # determine year
        colyear = colyearmap[colname]
        delta = dates.curyear - colyear
        w = df[[colname]]
        if delta == 0:
            dfs.append(w)
        else:  # reindex
            winew = [x + pd.DateOffset(years=delta) for x in w.index]
            w.index = winew
            dfs.append(w)

    # merge all series into one dataframe, concat doesn't quite do the job
    res = reduce(
        lambda left, right: pd.merge(
            left, right, left_index=True, right_index=True, how='outer'), dfs)
    res = res.dropna(how='all')  # drop uneeded columns out into future
    res = pandasutil.fillna_downbet(
        res)  # use this as above ffills incorrectly at end of timeseries

    return res
Ejemplo n.º 3
0
def reindex_year_df_rel_col(df):
    """
    Given a reindexed year dataframe, figure out which column to use for change summary
    Basic algorithm is use current year, unless you are 10 days from end of dataframe
    :param df:
    :return:
    """
    res_col = df.columns[0]

    years = dates.find_year(df)
    last_val_date = df.index[-1]

    colyears = [x for x in df if str(dates.curyear) in str(x)]
    if len(colyears) > 0:
        res_col = colyears[0]
        relyear = (pd.to_datetime('{}-01-01'.format(years.get(res_col)))
                   )  # year of this column

        dft = df[colyears].dropna()
        if len(dft) > 0:
            relcol_date = df[res_col].dropna().index[
                -1]  # last date of this column

            delta = last_val_date - relcol_date
            if delta.days < 10:
                relyear1 = (relyear + pd.DateOffset(years=1)).year
                relyear1 = [x for x in df if str(relyear1) in x]
                if len(relyear1) > 0:
                    return relyear1[0]
            else:
                return res_col

    return res_col
Ejemplo n.º 4
0
def time_spreads_quarterly(contracts, m1, m2):
    """
    Given a dataframe of daily values for monthly contracts (eg Brent Jan 15, Brent Feb 15, Brent Mar 15)
    with columns headings as '2020-01-01', '2020-02-01'
    Return a dataframe of time spreads  (eg m1 = Q1, m2 = Q2 gives Q1-Q2 spread)
    """

    qtrcontracts = quarterly_contracts(contracts)
    qtrcontracts_years = dates.find_year(qtrcontracts)
    cf = [x for x in qtrcontracts if x.startswith(m1)]
    dfs = []

    for c1 in cf:
        year1, year2 = qtrcontracts_years[c1], qtrcontracts_years[c1]
        if int(m1[-1]) >= int(
                m2[-1]):  # eg Q1-Q1 or Q4-Q1, then do Q419 - Q120 (year ahead)
            year2 = year1 + 1
        c2 = [
            x for x in qtrcontracts
            if x.startswith(m2) and qtrcontracts_years[x] == year2
        ]
        if len(c2) == 1:
            c2 = c2[0]
            s = qtrcontracts[c1] - qtrcontracts[c2]
            s.name = year1
            dfs.append(s)

    res = pd.concat(dfs, 1)
    res = res.dropna(how='all', axis='rows')
    return res
Ejemplo n.º 5
0
def std_yr_col(df, asdict=False):
    """
    Given a dataframe with yearly columns, determine the line colour to use
    """

    if isinstance(df, pd.Series):
        df = pd.DataFrame(df)

    yearmap = dates.find_year(df, use_delta=True)
    colmap = {}
    for colname, delta in yearmap.items():
        colmap[colname] = year_col_map.get(delta, default_line_col)

    if asdict:
        return colmap

    # return array of colours to use - this can be passed into cufflift iplot method
    return [colmap[x] for x in df]
Ejemplo n.º 6
0
 def test_find_year(self):
     df = pd.DataFrame(columns=['Q1 2020', 'Q2 2022'])
     res = dates.find_year(df)
     self.assertEqual(res['Q1 2020'], 2020)
     self.assertEqual(res['Q2 2022'], 2022)
Ejemplo n.º 7
0
 def test_find_year4(self):
     df = pd.DataFrame(columns=['FB', 'FP 2021'])
     res = dates.find_year(df)
     self.assertEqual(res['FB'], 'FB')
     self.assertEqual(res['FP 2021'], 2021)
Ejemplo n.º 8
0
 def test_find_year2(self):
     df = pd.DataFrame(columns=['CAL 2020-2021'])
     res = dates.find_year(df)
     self.assertEqual(res['CAL 2020-2021'], 2020)
Ejemplo n.º 9
0
def spread_combination(contracts, combination_type):
    """
    Convenience method to access functionality in forwards using a combination_type keyword
    :param contracts:
    :param combination_type:
    :return:
    """
    combination_type = combination_type.lower()
    contracts = contracts.dropna(how='all', axis='rows')

    if combination_type == "calendar":
        c_contracts = cal_contracts(contracts)
        colmap = dates.find_year(c_contracts)
        c_contracts = c_contracts.rename(
            columns={x: colmap[x]
                     for x in c_contracts.columns})
        return c_contracts
    if combination_type == "calendar spread":
        c_contracts = cal_spreads(cal_contracts(contracts))
        colmap = dates.find_year(c_contracts)
        c_contracts = c_contracts.rename(
            columns={x: colmap[x]
                     for x in c_contracts.columns})
        return c_contracts
    if combination_type.startswith('q'):
        q_contracts = quarterly_contracts(contracts)
        m = re.search('q\d-q\d', combination_type)
        if m:
            q_spreads = quarterly_spreads(q_contracts)
            q_spreads = q_spreads[[
                x for x in q_spreads.columns
                if x.startswith(combination_type.upper())
            ]]
            colmap = dates.find_year(q_spreads)
            q_spreads = q_spreads.rename(
                columns={x: colmap[x]
                         for x in q_spreads.columns})
            return q_spreads
        m = re.search('q\dq\dq\d', combination_type)
        if m:
            q_spreads = fly_quarterly(q_contracts,
                                      x=int(combination_type[1]),
                                      y=int(combination_type[3]),
                                      z=int(combination_type[5]))
            colmap = dates.find_year(q_spreads)
            q_spreads = q_spreads.rename(
                columns={x: colmap[x]
                         for x in q_spreads.columns})
            return q_spreads
        m = re.search('q\d', combination_type)
        if m:
            q_contracts = q_contracts[[
                x for x in q_contracts.columns
                if x.startswith(combination_type.upper())
            ]]
            colmap = dates.find_year(q_contracts)
            q_contracts = q_contracts.rename(
                columns={x: colmap[x]
                         for x in q_contracts.columns})
            return q_contracts

    # handle monthly, spread and fly inputs
    month_abbr_inv = {
        month.lower(): index
        for index, month in enumerate(month_abbr) if month
    }
    months = [x.lower() for x in month_abbr]
    if len(combination_type) == 3 and combination_type in months:
        c = contracts[[
            x for x in contracts if x.month == month_abbr_inv[combination_type]
        ]]
        c = c.rename(columns={x: x.year for x in c.columns})
        return c
    if len(combination_type) == 6:
        m1, m2 = combination_type[0:3], combination_type[3:6]
        if m1 in months and m2 in months:
            c = time_spreads(contracts, month_abbr_inv[m1], month_abbr_inv[m2])
            return c
    if len(combination_type) == 9:
        m1, m2, m3 = combination_type[0:3], combination_type[
            3:6], combination_type[6:9]
        if m1 in months and m2 in months and m3 in months:
            c = fly(contracts, month_abbr_inv[m1], month_abbr_inv[m2],
                    month_abbr_inv[m3])
            return c