def pulldata(ticker, interval, depth): Cdata = client.get_historical_klines(ticker, interval, depth) print(Cdata) df = pd.DataFrame(Cdata) if not df.empty: df[0] = pd.to_datetime(df[0], unit='ms') df.columns = [ 'Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'IGNORE', 'Quote_Volume', 'Trades_Count', 'BUY_VOL', 'BUY_VOL_VAL', 'x' ] df = df.set_index('Date') del df['IGNORE'] del df['BUY_VOL'] del df['BUY_VOL_VAL'] del df['x'] df["Open"] = pd.to_numeric(df["Open"]) df["Open"] = pd.to_numeric(df["Open"]) df["High"] = pd.to_numeric(df["High"]) df["Low"] = pd.to_numeric(df["Low"]) df["Close"] = pd.to_numeric(df["Close"]) df["Volume"] = round(pd.to_numeric(df["Volume"])) df["Quote_Volume"] = round(pd.to_numeric(df["Quote_Volume"])) df["Trades_Count"] = pd.to_numeric(df["Trades_Count"]) df['div'] = df['Open'] / df['Close'] df['Log_VolumeGain'] = ( np.log(df["Quote_Volume"] / df.Quote_Volume.shift(1)) * 100).fillna(0) df['pricegain'] = (df.Open.pct_change() * 100).fillna(0) df.to_csv(f'files/{ticker}.csv') print(df)
def pullData(ticker, interval, depth): raw = client.get_historical_klines(ticker, interval, depth) raw = pd.DataFrame(raw) print(raw) if not raw.empty: raw[0] = pd.to_datetime(raw[0], unit='ms') print(raw) raw.columns = [ 'timestamp', 'open', 'high', 'low', 'close', 'volume', 'IGNORE', 'quoteVolume', 'SELLVolume', 'BUY_VOL', 'BUY_VOL_VAL', 'x' ] del raw['IGNORE'] del raw['BUY_VOL'] del raw['BUY_VOL_VAL'] del raw['x'] del raw['SELLVolume'] # convert to numbers raw["open"] = pd.to_numeric(raw["open"]) raw["high"] = pd.to_numeric(raw["high"]) raw["low"] = pd.to_numeric(raw["low"]) raw["close"] = pd.to_numeric(raw["close"]) raw["volume"] = round(pd.to_numeric(raw["volume"])) raw["quoteVolume"] = round(pd.to_numeric(raw["quoteVolume"])) raw.loc[raw.quoteVolume < 100, 'quoteVolume'] = 100 raw['pchange1h'] = raw.close.diff(1).fillna( 0) # diff can has if for different timeperiods raw['pchange1hpct'] = round((raw['pchange1h'] / raw["close"]) * 100, 2) raw['pchange24h'] = raw.close.diff(23).fillna( 0) # diff can has if for different timeperiods raw['pchange24hpct'] = round((raw['pchange24h'] / raw["close"]) * 100, 2) raw['v1h'] = raw.quoteVolume.rolling(window=1).sum().fillna( 0) #.shift() raw['vchange1h'] = raw.v1h.diff(1).fillna( 0) # diff can has if for different timeperiods raw['vchange1hpct'] = round( (raw['vchange1h'] / raw["quoteVolume"]) * 100, 2) raw['v4h'] = raw.quoteVolume.rolling(window=4).sum().fillna( 0) #.shift() raw['vchange4h'] = raw.v4h.diff(4).fillna( 0) # diff can has if for different timeperiods raw['vchange4hpct'] = round( (raw['vchange4h'] / raw["quoteVolume"]) * 100, 2) raw['v24'] = raw.quoteVolume.rolling(window=23).sum().fillna( 0) #.shift() raw['vchange24h'] = raw.v24.diff(23).fillna( 0) # diff can has if for different timeperiods raw['vchange24hpct'] = round( (raw['vchange24h'] / raw["quoteVolume"]) * 100, 2) print(raw) return raw
def get_coin_data(get_flag="", path="", name="", pair="", start_date="", end_date="", convert_date=convert_date): """Collects coin price data from binance API saves a .csv file. Arguments: get_flag {str}: If left blank the function defaults to the whats in the config file. path {str}: where .csv will be saved. The function defaults to to the path data/coins/ if path is blank. pair {str}: cryptocurrency pair like: "BTCUSDT" = Bitcoin and Tether. start_date {str}: Date in the ISO 8601 format *with hour* YYYY-MM-DDT01 - Can be left blank the default is "2015-05-01T00" for collect. end_date {str}:Date in the ISO 8601 format *with hour* YYYY-MM-DDT01 Returns: Nothing. """ ##set up jfile = open("config.json") config = json.load(jfile) fname = "{}.csv".format(name.replace(" ", "-")) if path == "": path = os.path.join(config["data"]["base_path"], "coins/") if get_flag == "": get_flag = config["data"]["coins_get"] jfile.close() if get_flag == "collect": status = "w+" if start_date == "": start_date == "May 1, 2015 12:00 AM EST" elif get_flag == "append": write_status = "a+" ###get start date from file if start_date != "": print( "Warning the start_date should be left blank for appending so that the last end_date is used." ) if datetime.strptime(start_date, "%Y-%m-%dT%H") >= datetime.strptime( end_date, "%Y-%m-%dT%H"): print("Data acquisition already satisfied") return False elif start_date == "": coin_path = os.path.join(config["data"]["base_path"], "coins/{}".format(fname)) coindf = pd.read_csv(coin_path) start_date = coindf["open_time_iso"].iloc[-1:].values[ 0][:-6].replace(" ", "T") print(start_date) if datetime.strptime(start_date, "%Y-%m-%dT%H") >= datetime.strptime( end_date, "%Y-%m-%dT%H"): print("Data acquisition already satisfied") return False print("Getting data for Coin: {}, Pair: {}, dates: {} to {}".format( name, pair, start_date, end_date)) start_date, end_date = convert_date(start_date), convert_date(end_date) print(start_date, end_date) client = Client() klines = client.get_historical_klines(pair, Client.KLINE_INTERVAL_1HOUR, start_date, end_date) save_path = os.path.join(path, fname) print("Got data. Now {}ing...".format(get_flag)) with open(save_path, write_status) as f: if write_status == "collect": f.write( "open_time_iso,open_time_unix,open,high,low,close,volume,close_time,number_of_trades\n" ) for kline in klines: f.write("{},{},{},{},{},{},{},{},{}\n".format( datetime.fromtimestamp(kline[0] / 1000).isoformat().replace( "T", " "), kline[0], kline[1], kline[2], kline[3], kline[4], kline[5], kline[6], kline[8])) print("Data saved here {}.".format(save_path))
def get_historical_data(*, client, symbol, interval): """ Return historical pricing data from the Binance Exchange. Parameters: client (binance.client.Client): An open connected client to the Binance Exchange API. symbol (str): A Crypto-Pair symbol. interval (str): An OHLC candlestick width. Returns: pandas.core.frame.DataFrame: Historical pricing data for the given symbol and interval. """ raw = client.get_historical_klines( symbol=symbol, interval=interval, start_str='2019-01-01 00:00:00' ) df = pandas.DataFrame( data=raw[:-1], columns=[ 'OPEN_TIME', 'OPEN', 'HIGH', 'LOW', 'CLOSE', 'VOLUME', 'CLOSE_TIME', 'QUOTE_ASSET_VOLUME', 'NUMBER_TRADES', 'TAKER_BASE_ASSET_VOLUME', 'TAKER_QUOTE_ASSET_VOLUME', 'IGNORE' ] ).drop( columns=['QUOTE_ASSET_VOLUME', 'TAKER_BASE_ASSET_VOLUME', 'TAKER_QUOTE_ASSET_VOLUME', 'IGNORE'] ).sort_values( by=['OPEN_TIME'] ).reset_index( drop=True ).rename( { 'OPEN_TIME': 'open_time', 'OPEN': 'open', 'HIGH': 'high', 'LOW': 'low', 'CLOSE': 'close', 'NUMBER_TRADES': 'number_trades', 'VOLUME': 'volume', 'CLOSE_TIME': 'close_time' }, axis='columns' ) df['symbol'] = symbol df['width'] = interval df['open_time'] = pandas.to_datetime(df['open_time'], unit='ms') df['close_time'] = pandas.to_datetime(df['close_time'], unit='ms') def format_hour(timestamp): timestamp = timestamp.replace(second=0, microsecond=0) if timestamp.minute == 59: timestamp += numpy.timedelta64(1, 'm') timestamp.replace(minute=0) return timestamp df['open_time'] = df['open_time'].map(lambda x: format_hour(x)) df['close_time'] = df['close_time'].map(lambda x: format_hour(x)) df['open'] = df['open'].astype(float) df['high'] = df['high'].astype(float) df['low'] = df['low'].astype(float) df['close'] = df['close'].astype(float) df['volume'] = df['volume'].astype(float) df['number_trades'] = df['number_trades'].astype(float) return df
def getHistorical(): #earliestPossible = client._get_earliest_valid_timestamp("ETHUSDT", timeframe) historical = client.get_historical_klines("ETHUSDT", timeframe, "2 weeks ago UTC") return (historical)