Example #1
0
def get_detailed_data(con=None, data=None, _from=None, to=None, days_span=10):
    """
    Function that retrieves the data 

    Parameters
    ----------
    data : Dataframe that contains tickers and FIGIs of instrument. Get it using get_instruments()
        
    _from : datetime, optional
        Date (beginning of the period for query). The default is None.
    to : datetime, optional
        Date (end of the period for query). The default is None.
    interval : str, required
        frequency of data to retrieve. The default is "1min".@todo, not implemented
    days_span : int, required
        How much day back to query from now. The default is 10 days.

    Returns
    -------
    df_merge : TYPE
        DESCRIPTION.

    """
    start = time.time()
    logger = logging.getLogger("TinkoffAPI::" + get_detailed_data.__name__)

    logger.info(f"retrieving available data is in progress ...")
    if to == None:
        to = datetime.now()
        endTime = to
    else:
        endTime = to
    if _from == None:
        _from = endTime - timedelta(days=days_span)

    list_df = []
    for figi in tqdm(data.figi.unique()):
        logger.info("retrieving figi: " + figi + " ticker: " +
                    data[data.figi == figi].ticker.values)

        df = detailed_history(con=con,
                              figi=figi,
                              _from=_from,
                              to=to,
                              interval="1min",
                              days_span=days_span)
        time.sleep(2)
        list_df.append(df)

    data.reset_index(inplace=True, drop=True)
    df_merge = pd.concat(list_df)
    df_merge = pd.merge(left=df_merge,
                        right=data,
                        left_on="figi",
                        right_on="figi")
    timer_string = utilities_timers.format_timer_string(time.time() - start)
    logger.info(timer_string)
    logger.info(f"retrieved dataframe with shape {df_merge.shape} ")
    return df_merge
Example #2
0
def get_instruments(con=None, instrument=None):
    """
    Gets list of instruments (bonds, stocks, ETFs that traded on tinkoff-invest broker)

    Args:
        con(connector) : TinkoffAPI connector. Create one using connect() function
        instrument(str) : can be either stock, bond, etf. 
   
    Returns:
        df (Pandas dataframe) : list of instruments available for trade

    Raises:
        Error if not defined instrument or wrong instrument name is provided as input.
    """
    start = time.time()
    logger = logging.getLogger(apiname + connect.__name__)

    logger.info(f"retrieving available {instrument} is in progress ...")
    if instrument == "Stock":
        tickers = con.market.market_stocks_get_with_http_info()
    elif instrument == "Bond":
        tickers = con.market.market_bonds_get_with_http_info()
    elif instrument == "Etf":
        tickers = con.market.market_etfs_get_with_http_info()
    else:
        logger.error(
            f"Wrong instrument defined. Acceptable are Stock, Bond, Etf. You provided {instrument}"
        )
        return None
    list_of_tickers = []
    for item in tickers[0].payload.instruments:
        list_of_tickers.append(item.to_dict())
    df = pd.DataFrame.from_records(list_of_tickers)
    timer_string = utilities_timers.format_timer_string(time.time() - start)
    logger.info(timer_string)
    logger.info(f"retrieved dataframe with shape {df.shape} ")
    return df
Example #3
0
def get_SQL_table(connection, table_name):
    """Get data from given table stored on SQL server.
    Warning: Can take extreme amount of memory if you will querry whole minutes table!

    Args:
        client : clickhouse connector. Client object of clickhouse_driver.client module
        table_name (string) : table name in Clickhouse database. If not exists will crush.

    Returns:
        df (Pandas.DataFrame) : table with data
    """

    start = time.time()
    logger = logging.getLogger(apiname + get_SQL_table.__name__)
    logger.info(f"read table {table_name} from Clickhouse")

    result, columns = connection.execute(f"SELECT * FROM {table_name}",
                                         with_column_types=True)
    df = pd.DataFrame(result, columns=[tuple[0] for tuple in columns])

    timer_string = utilities_timers.format_timer_string(time.time() - start)
    logger.info(timer_string)

    return df
Example #4
0
    df_data_stock = tapi.get_detailed_data(con=con,
                                           data=df_stock,
                                           _from=startTime,
                                           to=endTime,
                                           days_span=10)
    list_of_securities.append(df_data_stock)
except Exception as error:
    logger.error("scrip failed during downloading Stocks data")
    logger.error(f"exception catched: {error}")

df = pd.concat(list_of_securities)
logger.info("Creating connection to clickhouse")
con, ping = chh.connect(server_adress)

logger.info("Uploading data to clickhouse")
try:
    chh.append_df_to_SQL_table(
        df=df,
        table_name=table_name,
        server_ip=server_adress,
        is_tmp_table_to_delete=True,
    )
except Exception as error:
    logger.error("scrip failed during pushing data to clickhouse")
    logger.error(f"exception catched: {error}")

chh.close_connection(con)

timer_string = utilities_timers.format_timer_string(time.time() - start)
logger.info(f"Script runs for {timer_string}")
Example #5
0
def append_df_to_SQL_table(
    df=None,
    table_name="minutes",
    server_ip="localhost",
    is_tmp_table_to_delete=True,
):
    """write dataframe to SQL server. Only values with unique columns will be append.

    Args:
        df (Pandas.DataFrame) : data to be written to SQL
        table_name (string) : table name in Clickhouse database."minutes" by default.
        server_ip (string) : Server IP. 'localhost' by default
        is_tmp_table_to_delete (bool) :
            True (default) to delete temporary table,
            which was used to keep new data when merging with the existing table.
            Set False if you want to keep tmp table for any reason (e.g. merge this table to multiple tables).

    Returns:
        nothing

    Raises:
        nothing

    """
    start = time.time()

    logger = logging.getLogger(apiname + append_df_to_SQL_table.__name__)

    ##---------------------------------------------------------------------------------------------
    n_rows, n_cols = df.shape

    logger.info(f"write (row x col) : ({n_rows} x {n_cols})")
    expected_min = 1 * n_rows * n_cols / 130000 / 34  # experimental formula for ru0138
    logger.info(f"expected time ~{int(expected_min)} minutes")

    if n_rows == 0:
        logger.info(
            f"DataFrame has 0 rows. Target table will not be modified. Exit.")
        return

    logger.info(f"list of columns in df: {df.columns}.")

    if table_name[0].isdigit():
        table_name = "_" + table_name
        logger.warning(
            f"table name started from digit. Rename as {table_name}.")

    con, ping = connect(server_ip)
    logger.info(f"data uploading to a temp table  ...")
    # dropping table tmp
    con.execute("DROP TABLE IF EXISTS tmp")
    # creating new table
    con.execute("CREATE TABLE tmp ("
                "figi String, "
                "interval String, o Float64, "
                "c Float64, h Float64, "
                "l Float64, v Int64, "
                "time DateTime, ticker String, "
                "isin String, min_price_increment Float64,"
                "lot Int64, currency String,"
                "name String, type String) ENGINE = Log ")
    con.execute("INSERT INTO tmp VALUES", [tuple(x) for x in df.values])

    inserted_rows_count = con.execute("SELECT count(*) FROM tmp")[0][0]
    logger.info(
        f"Inserted {inserted_rows_count} rows to temporary table. Moving to 'minutes' table"
    )
    initial_rows_count = con.execute("SELECT count(*) FROM minutes")[0][0]
    logger.info(f"initially minutes table has {initial_rows_count} rows")
    con.execute(
        "INSERT INTO minutes "
        "SELECT DISTINCT "
        "toDate(time) AS day,"
        "figi, "
        "interval, o, "
        "c , h, "
        "l , v, "
        "time, ticker, "
        "isin, min_price_increment,"
        "lot, currency,"
        "name, type FROM tmp WHERE (ticker, time) NOT IN (SELECT (ticker, time) FROM minutes)"
    )

    logger.info(f"merge complete")
    new_rows_count = con.execute("SELECT count(*) FROM minutes")[0][0]
    inserted_row_count = new_rows_count - initial_rows_count
    logger.info(f"Inserted {inserted_row_count} unique rows to minutes table.")
    logger.info(f"now minutes table has {new_rows_count} rows")
    if is_tmp_table_to_delete:
        con.execute("DROP TABLE IF EXISTS tmp")

    timer_string = utilities_timers.format_timer_string(time.time() - start)
    logger.info(timer_string)

    logger.info("new data written to table 'minutes'")
Example #6
0
def query_data_by_time(
    channels_list,
    startTime=None,
    endTime=None,
    days_span=90,
    server_ip="localhost",
    table_name="minutes",
    instrument_type="Etf",
    data_freq="min",
):
    """
    Request data from SQL DataBase in time range [startTime, endTime].

    Args:
        channels_list (list) : list of channels to be quired from DataBase
        startTime (datetime) : data will be quired AFTER this time moment
            If not set: 1 year period from endTime.
        endTime (datetime) : data will be quired BEFORE this time moment
            If not set: datetime.now() is used.
        days_span (int) : number of days before the endTime, if StartTime is not set.
        server_ip : string
        ip adress of Clickhouse instance(i.e. 192.168.1.128). localhost by default.
        Username and password taken from .env file
        table_name (string) : table name in SQL database.minutes by default

    Returns
        (pd.DataFrame) : Table with requested channels in a given time range.
    """
    start = time.time()
    logger = logging.getLogger(apiname + query_data_by_time.__name__)
    logger.info("data query in progress ...")

    if channels_list == None:
        channels_list = ["*"]
    if len(channels_list) == 0:
        channels_list = ["*"]
    if endTime == None:
        endTime = datetime.now()

    if startTime == None:
        startTime = endTime - timedelta(days=days_span)
    # Check-up on proper instrument type
    if instrument_type not in ["Etf", "Bond", "Stock"]:
        logger.error(
            "Unsupported instrument type. Only Etf, Bond, Stock are supported")
        logger.error("Does not querry anything")
        return None
    if data_freq not in ["min", "day", "week"]:
        logger.error(
            "Unsupported data frequency. Only min, day, week are supported")
        logger.error("Does not querry anything")
        return None

    con, _ = connect(server_ip)
    channel_string = (" ,").join(channels_list)
    # converting values to strings to get clickhouse-compatible time format
    startTime = startTime.strftime("%Y-%m-%d %H:%M:%S")
    endTime = endTime.strftime("%Y-%m-%d %H:%M:%S")
    if data_freq == "min":

        msg1 = f"select * from {table_name} "
        msg2 = f"where time BETWEEN '{startTime}' AND '{endTime}' "
        msg3 = f"AND type='{instrument_type}' AND name={channel_string}"
        query = msg1 + msg2 + msg3
    elif data_freq == "day":

        logger.info(f"startTime is {startTime}")
        logger.info(f"endTIme is {endTime}")
        msg1 = f"select uniq(time), count(), ticker, type,currency,name, day, argMin(o, time) as o,max(h) as h, min(l) as l, argMax(c, time) as c, sum(v) as v "
        msg2 = f"from minutes "

        msg3 = f"where time BETWEEN '{startTime}' AND '{endTime}' AND type='{instrument_type}' "
        msg4 = "GROUP BY day, ticker, type, currency, name ORDER BY day desc"
        query = msg1 + msg2 + msg3 + msg4
    elif data_freq == "week":

        msg1 = "SELECT uniq(time), count(),ticker,currency, name, toMonday(day) as monday, argMin(o, time) as o, max(h) as h, min(l) as l, argMax(c, time) as c, sum(v) as v "
        msg2 = f"from minutes "
        msg3 = f"where time BETWEEN '{startTime}' AND '{endTime}' AND type='{instrument_type}' "
        msg4 = "GROUP BY monday, ticker, type, currency, name ORDER BY monday desc"
        query = msg1 + msg2 + msg3 + msg4

    logger.info(f"query string: {query}")

    result, columns = con.execute(query, with_column_types=True)
    df = pd.DataFrame(result, columns=[tuple[0] for tuple in columns])

    close_connection(con)

    timer_string = utilities_timers.format_timer_string(time.time() - start)
    logger.info(timer_string)
    logger.info(f"data query complete. Dataframe has shape : {df.shape}.")
    logger.info("--------------------------")

    return df
Example #7
0
def detailed_history(con=None,
                     figi="BBG00M0C8YM7",
                     _from=None,
                     to=None,
                     interval="1min",
                     days_span=10):
    """
    

    Parameters
    ----------
    con(connector) : TinkoffAPI connector. Create one using connect() function
    figi : str, required
        FIGI identifier of instrument. The default is 'BBG00M0C8YM7'.
    _from : datetime, optional
        Date (beginning of the period for query). The default is None.
    to : datetime, optional
        Date (end of the period for query). The default is None.
    interval : str, required
        frequency of data to retrieve. The default is "1min".
    days_span : int, required
        How much day back to query from now. The default is 10 days.

    Returns
    -------
    df_merge : Pandas dataframe
        Contains candles data for selected FIGI for given time period.

    """
    start = time.time()
    logger = logging.getLogger(apiname + detailed_history.__name__)

    logger.info(f"retrieving available data is in progress ...")
    if to == None:
        to = datetime.now()
        endTime = to
    else:
        endTime = to
    if _from == None:
        _from = endTime - timedelta(days=days_span)
        startTime = _from
    else:
        startTime = _from

    startTime_list = []
    endTime_list = []

    startTime_list.append(startTime.strftime(timeformat) + gmt_time)
    endTime_list.append(endTime.strftime(timeformat) + gmt_time)

    if (endTime - startTime).days > 1:
        startTime_list = []
        endTime_list = []
        endTime = to
        for i in range((to - _from).days):
            # print(i)
            newStartTime = endTime - timedelta(days=1)
            startTime_list.append(newStartTime.strftime(timeformat) + gmt_time)
            endTime_list.append(endTime.strftime(timeformat) + gmt_time)
            endTime = newStartTime

    list_df = []
    for i in range(len(startTime_list)):
        tickers = con.market.market_candles_get_with_http_info(
            figi=figi,
            _from=startTime_list[i],
            to=endTime_list[i],
            interval="1min")
        list_of_tickers = []
        for item in tickers[0].payload.candles:
            list_of_tickers.append(item.to_dict())
        df = pd.DataFrame.from_records(list_of_tickers)
        list_df.append(df)

    df_merge = pd.concat(list_df)
    timer_string = utilities_timers.format_timer_string(time.time() - start)
    logger.info(timer_string)
    logger.info(f"Shape of dataframe {df_merge.shape} ...")
    return df_merge