예제 #1
0
def _get_month_day_diffs(df_symbols):
    symbol = df_symbols[Column.SYMBOL]
    df = update_dataframe(df_symbols[Column.HISTORY], symbol)

    if df.empty:
        return df

    df_months = DataFrame(columns=Column.ALL)
    for year in df[Column.YEAR].unique():
        for month in df[Column.MONTH].unique():
            df_month = df[
                (df[Column.YEAR] == year) & (df[Column.MONTH] == month)
            ].copy()
            if df_month.empty:
                continue
            first_day = df_month[Column.DAY].min()
            df_month[Column.PERCENT] = (
                df_month[Column.OPEN]
                / df_month[df_month[Column.DAY] == first_day].iloc[0][Column.OPEN]
            )
            if (
                df_month.shape[0] >= 28 - 10
            ):  # 28 days in shortest Feb, 10 days - weeknds max
                df_months = df_months.append(df_month)
            else:
                logger.debug(f"Not enough data for {symbol} in {year}.{month}")

    return df_months[
        [Column.YEAR, Column.MONTH, Column.DAY, Column.SYMBOL, Column.PERCENT]
    ]
예제 #2
0
def _get_year_day_diffs(df_symbols):
    symbol = df_symbols[Column.SYMBOL]
    df = update_dataframe(df_symbols[Column.HISTORY], symbol)
    date_column_name = get_date_column_name(df)

    df_years = DataFrame(columns=df.columns)
    for year in df[Column.YEAR].unique():
        df_year = df[df[Column.YEAR] == year].copy()
        if df_year.shape[0] < 150:
            logger.debug(f"Not enough data for {symbol} in {year}")
            continue

        first = df_year[date_column_name].min()
        df_year[Column.PERCENT] = (
            df_year[Column.OPEN]
            / df_year[df_year[date_column_name] == first].iloc[0][Column.OPEN]
        )
        assert (
            df_year.shape[0] > 150
        ), f"Wrong data in dataframe {df_year.shape} for year {year}"

        df_years = df_years.append(df_year)

    return df_years[
        [
            date_column_name,
            Column.YEAR,
            Column.MONTH,
            Column.SYMBOL,
            Column.PERCENT,
        ]
    ]
예제 #3
0
def _get_week_diffs(df_symbols):
    symbol = df_symbols[Column.SYMBOL]
    df = update_dataframe(df_symbols[Column.HISTORY], symbol, True)

    return df[
        [
            Column.YEAR,
            Column.WEEK,
            Column.SYMBOL,
            Column.PERCENT,
        ]
    ]
예제 #4
0
def _get_year_day_diffs(df_symbols):
    symbol = df_symbols[Column.SYMBOL]
    df = update_dataframe(df_symbols[Column.HISTORY], symbol, True)
    date_column_name = get_date_column_name(df)

    return df[
        [
            date_column_name,
            Column.YEAR,
            Column.MONTH,
            Column.SYMBOL,
            Column.PERCENT,
        ]
    ]
예제 #5
0
def _get_quarter_diffs(df_symbols):
    symbol = df_symbols[Column.SYMBOL]
    df_history = df_symbols[Column.HISTORY]

    df = update_dataframe(df_history, symbol)

    minutes = df[Column.MINUTE].unique()
    assert minutes.shape[0] > 3, f"Wrong data for {symbol} {minutes}"

    df_days = DataFrame(columns=Column.ALL)
    for year in df[Column.YEAR].unique():
        for week in df[Column.WEEK].unique():
            for day in df[Column.DAY].unique():
                for hour in df[Column.HOUR].unique():
                    df_hour = df[
                        (df[Column.YEAR] == year)
                        & (df[Column.WEEK] == week)
                        & (df[Column.DAY] == day)
                        & (df[Column.HOUR] == hour)
                    ].copy()
                    if df_hour.empty:
                        continue
                    first_time = df_hour[Column.MINUTE].min()
                    df_hour[Column.PERCENT] = (
                        df_hour[Column.OPEN]
                        / df_hour[df_hour[Column.MINUTE] == first_time].iloc[0][
                            Column.OPEN
                        ]
                    )
                    if (
                        df_hour.shape[0] >= 2
                    ):  # good data is at least 2 times per hour (9:30, 9:45)
                        df_days = df_days.append(df_hour)
                    else:
                        logger.debug(f"Not enough data for {symbol} in {week} {day}")

    df_days = df_days[df_days[Column.MINUTE].isin(range(0, 60, 15))]
    df_days[Column.QUARTER] = df_days[Column.MINUTE]
    return df_days[
        [
            Column.YEAR,
            Column.WEEK,
            Column.DAY,
            Column.HOUR,
            Column.MINUTE,
            Column.QUARTER,
            Column.SYMBOL,
            Column.PERCENT,
        ]
    ]
예제 #6
0
def _get_hour_diffs(df_symbols):
    symbol = df_symbols[Column.SYMBOL]
    df_history = df_symbols[Column.HISTORY]

    df = update_dataframe(df_history, symbol, True)

    return df[
        [
            Column.YEAR,
            Column.WEEK,
            Column.DAY,
            Column.HOUR,
            Column.SYMBOL,
            Column.PERCENT,
        ]
    ]
예제 #7
0
def _get_quarter_diffs(df_symbols):
    symbol = df_symbols[Column.SYMBOL]
    df_history = df_symbols[Column.HISTORY]

    df = update_dataframe(df_history, symbol, True)

    df = df[df[Column.MINUTE].isin(range(0, 60, 15))]
    df[Column.QUARTER] = df[Column.MINUTE]
    return df[
        [
            Column.YEAR,
            Column.WEEK,
            Column.DAY,
            Column.HOUR,
            Column.MINUTE,
            Column.QUARTER,
            Column.SYMBOL,
            Column.PERCENT,
        ]
    ]
예제 #8
0
def _get_time_diffs(df_symbols):
    symbol = df_symbols[Column.SYMBOL]
    df_history = df_symbols[Column.HISTORY]

    df = update_dataframe(df_history, symbol, True)

    df = df[df[Column.MINUTE].isin([0, 30])]
    df[Column.TIME] = df.apply(lambda x: x[Column.HOUR] + x[Column.MINUTE] / 60, axis=1)

    return df[
        [
            Column.YEAR,
            Column.WEEK,
            Column.DAY,
            Column.HOUR,
            Column.MINUTE,
            Column.TIME,
            Column.SYMBOL,
            Column.PERCENT,
        ]
    ]
예제 #9
0
def _get_hour_diffs(df_symbols):
    symbol = df_symbols[Column.SYMBOL]
    df_history = df_symbols[Column.HISTORY]

    df = update_dataframe(df_history, symbol)

    hours = df[Column.HOUR].unique()
    assert hours.shape[0] > 5, f"Wrong data for {symbol} {hours}"

    df_days = DataFrame(columns=Column.ALL)
    for year in df[Column.YEAR].unique():
        for week in df[Column.WEEK].unique():
            for day in df[Column.DAY].unique():
                df_day = df[
                    (df[Column.YEAR] == year)
                    & (df[Column.WEEK] == week)
                    & (df[Column.DAY] == day)
                ].copy()
                if df_day.empty:
                    continue
                first_hour = df_day[Column.HOUR].min()
                df_day[Column.PERCENT] = (
                    df_day[Column.OPEN]
                    / df_day[df_day[Column.HOUR] == first_hour].iloc[0][Column.OPEN]
                )
                if df_day.shape[0] >= 5:  # good data is at least 5 hours per day
                    df_days = df_days.append(df_day)
                else:
                    logger.debug(f"Not enough data for {symbol} in {week} {day}")

    return df_days[
        [
            Column.YEAR,
            Column.WEEK,
            Column.DAY,
            Column.HOUR,
            Column.SYMBOL,
            Column.PERCENT,
        ]
    ]
예제 #10
0
def _get_monthly_diffs(df_symbols):
    symbol = df_symbols[Column.SYMBOL]
    df = update_dataframe(df_symbols[Column.HISTORY], symbol)

    df_months = DataFrame(columns=Column.ALL)
    for year in df[Column.YEAR].unique():
        df_month = df[df[Column.YEAR] == year].copy()
        if df_month.shape[0] < 12:
            logger.debug(f"Not enough data for {symbol} in {year}")
            continue

        first_month = df_month[Column.MONTH].min()
        df_month[Column.PERCENT] = (
            df_month[Column.OPEN]
            / df_month[df_month[Column.MONTH] == first_month].iloc[0][Column.OPEN]
        )
        assert (
            df_month.shape[0] == 12
        ), f"Wrong number of month in dataframe {df_month.shape} for year {year}"

        df_months = df_months.append(df_month)

    return df_months[[Column.YEAR, Column.MONTH, Column.SYMBOL, Column.PERCENT]]
예제 #11
0
def _get_best_weekday_diffs(df_symbols):
    symbol = df_symbols[Column.SYMBOL]
    df = update_dataframe(df_symbols[Column.HISTORY], symbol)

    # if number of working days less than 3 - don't count
    number_of_good_working = 3

    df_weeks = DataFrame(columns=Column.ALL)
    for year in df[Column.YEAR].unique():
        for week in df[Column.WEEK].unique():
            df_week = df[(df[Column.YEAR] == year) & (df[Column.WEEK] == week)].copy()
            if df_week.empty:
                continue

            days = df_week[Column.WEEKDAY].values
            if df_week.shape[0] < number_of_good_working:
                # first and last week of year might contain only 1-2 days
                if week not in (1, 52, 53):
                    logger.debug(
                        f"Not enough data for {symbol} in {year} week {week}: {days}"
                    )
                continue

            first_weekday = df_week[Column.WEEKDAY].min()
            df_week[Column.PERCENT] = (
                df_week[Column.OPEN]
                / df_week[df_week[Column.WEEKDAY] == first_weekday].iloc[0][Column.OPEN]
            )
            assert (
                df_week.shape[0] >= number_of_good_working
            ), f"Wrong number of weekdays in dataframe {df_week.shape} for year {year} {week}: {days}"

            df_weeks = df_weeks.append(df_week)

    return df_weeks[
        [Column.YEAR, Column.WEEK, Column.WEEKDAY, Column.SYMBOL, Column.PERCENT]
    ]
예제 #12
0
def _get_month_day_diffs(df_symbols):
    symbol = df_symbols[Column.SYMBOL]
    df = update_dataframe(df_symbols[Column.HISTORY], symbol, True)

    return df[[Column.YEAR, Column.MONTH, Column.DAY, Column.SYMBOL, Column.PERCENT]]