def import_data(self, doc_type_l=["Tick", "Transaction", "Order"]): # clear all the old files for fp in LOG_FILE_PATH, DATA_INFO_PATH, DATA_FINISH_INFO_PATH, DATA_PATH: self._get_empty_folder(fp) arc = Arctic(ARCTIC_SRV) for doc_type in DOC_TYPE: # arc.initialize_library(get_library_name(doc_type), lib_type=CHUNK_STORE) arc.set_quota(get_library_name(doc_type), MAX_SIZE) arc.reset() # doc_type = 'Day' for doc_type in doc_type_l: date_list = list( set([ int(path.split("_")[0]) for path in os.listdir(DATABASE_PATH) if doc_type in path ])) date_list.sort() date_list = [str(date) for date in date_list] f = open(ALL_STOCK_PATH, "r") stock_name_list = [lines.split("\t")[0] for lines in f.readlines()] f.close() stock_name_dict = { "SH": [ stock_name[2:] for stock_name in stock_name_list if "SH" in stock_name ], "SZ": [ stock_name[2:] for stock_name in stock_name_list if "SZ" in stock_name ], } lib_name = get_library_name(doc_type) a = Arctic(ARCTIC_SRV) # a.initialize_library(lib_name, lib_type=CHUNK_STORE) stock_name_exist = a[lib_name].list_symbols() lib = a[lib_name] initialize_count = 0 for stock_name in stock_name_list: if stock_name not in stock_name_exist: initialize_count += 1 # A placeholder for stocks pdf = pd.DataFrame(index=[pd.Timestamp("1900-01-01")]) pdf.index.name = "date" # an col named date is necessary lib.write(stock_name, pdf) print("initialize count: {}".format(initialize_count)) print("tasks: {}".format(date_list)) a.reset() # date_list = [files.split("_")[0] for files in os.listdir("./raw_data_price") if "tar" in files] # print(len(date_list)) date_list = ["20201231"] # for test Parallel(n_jobs=min(2, len(date_list)))( delayed(add_data)(date, doc_type, stock_name_dict) for date in date_list)
def add_one_stock_daily_data_wrapper(filepath, type, exchange_place, index, date): pid = os.getpid() code = os.path.split(filepath)[-1].split(".csv")[0] arc = Arctic(ARCTIC_SRV) try: if index % 100 == 0: print("index = {}, filepath = {}".format(index, filepath)) error_index_list = add_one_stock_daily_data(filepath, type, exchange_place, arc, date) if error_index_list is not None and len(error_index_list) > 0: f = open( os.path.join( LOG_FILE_PATH, "temp_timestamp_error_{0}_{1}_{2}.txt".format( pid, date, type)), "a+") f.write("{}, {}, {}\n".format(filepath, error_index_list, exchange_place + "_" + code)) f.close() except Exception as e: info = traceback.format_exc() print("error:" + str(e)) f = open( os.path.join(LOG_FILE_PATH, "temp_fail_{0}_{1}_{2}.txt".format(pid, date, type)), "a+") f.write("fail:" + str(filepath) + "\n" + str(e) + "\n" + str(info) + "\n") f.close() finally: arc.reset()
def __init__(self, use_arctic=False, z_score=True, alpha=None): """ Simulator constructor :param use_arctic: If TRUE, Simulator creates a connection to Arctic, Otherwise, no connection is attempted :param z_score: If TRUE, normalize data with z-score, ELSE use min-max scaler """ self._scaler = StandardScaler() if z_score else MinMaxScaler() self.cwd = os.path.dirname(os.path.realpath(__file__)) self.ema = load_ema(alpha=alpha) try: if use_arctic: print('Attempting to connect to Arctic...') self.arctic = Arctic(MONGO_ENDPOINT) self.arctic.initialize_library(ARCTIC_NAME, lib_type=TICK_STORE) self.library = self.arctic[ARCTIC_NAME] print('Connected to Arctic.') else: self.arctic = self.library = None except Exception as ex: self.arctic = self.library = None print('Unable to connect to Arctic database') print(ex)
def __init__(self, start_t=date(2010, 1, 1), end_t=date(2016, 12, 31)): # 从 pkl 读数据是另外一个 class 处理 # cache 的文件路径将根据 start_t , end_t , FEATURES 做 hash # 暂时根据 start / end 确定存盘路径,以后改成只有一份全部数据的 pickle cache_file_path = os.path.join( "/tmp", f"TestTSDataGenerator_{start_t.isocalendar()}_{end_t.isoformat()}.pkl" ) self.original_data: pd.DataFrame = None self.df_x_to_loop: pd.DataFrame = None self.df_y_to_loop: pd.DataFrame = None if os.path.isfile(cache_file_path): self.original_data = pd.read_pickle(cache_file_path, compression="gzip") else: # 从 arctic 读取数据并缓存 arctic_store = Arctic(get_mongo_admin_conn_str()) lib_name = "jy_chn_equity_otvn_chunkstore" lib_chunk_store = arctic_store[lib_name] symbol_name = "mkt_data" self.original_data: pd.DataFrame = lib_chunk_store.read( symbol_name, chunk_range=pd.date_range(start_t, end_t), filter_data=True, columns=self.FEATURES) self.original_data.to_pickle(cache_file_path, compression="gzip", protocol=4)
def sync(): store = Arctic('localhost') store.initialize_library('Bitmex') library = store['Bitmex'] df = get_candle_pandas() print (df) library.write('XBTUSD', df, metadata={'source': 'Bitmex'})
def show(): # Connect to Local MONGODB store = Arctic('localhost') # Create the library - defaults to VersionStore store.initialize_library('Bitmex') # Access the library library = store['Bitmex'] #library.write('XBTUSD', df, metadata={'source': 'Bitmex'}) # Reading the data item = library.read('XBTUSD') xbtusd = item.data metadata = item.metadata print (xbtusd) print (metadata) xbtusd['ret'] = -1+xbtusd['close']/xbtusd['close'].shift(1) from math import sqrt xbtusd['vol10'] = sqrt(260)*xbtusd['ret'].rolling(10).std(ddof=0) xbtusd['vol30'] = sqrt(260)*xbtusd['ret'].rolling(30).std(ddof=0) #print (volList) #plt.plot(df.index, df['close'], label='price') plt.plot(xbtusd.index, xbtusd['vol10'], label='vol10') plt.plot(xbtusd.index, xbtusd['vol30'], label='vol30') #plt.plot(xbtusd['ret']) plt.ylabel('vol') plt.xlabel('Date') plt.legend(loc=0) plt.show()
def __init__(self, TYPE): """ :param TYPE: 'BS', 'IS', 'CF' """ ############ SETTING ############# self.config = GetConfig() self.TYPE = TYPE # 'BS', 'IS', 'CF' self.MONGO = self.config.MONGO self.CSV = self.config.CSV self.RAW = False self.outdir = self.config.fund_dir self.encode = self.config.encode self.proxypool = self.config.proxypool ############ CHANGE ABOVE SETTING ############# if self.MONGO: from arctic import Arctic # mongod --dbpath D:/idwzx/project/arctic a = Arctic(self.config.ahost) a.initialize_library('ashare_{}'.format(self.TYPE)) self.lib = a['ashare_{}'.format(self.TYPE)] self.result_dict = {}
def __init__(self, save_to_file: bool = True, host: str = 'localhost', library: str = 'tick_blotter', collection: str = 'test_blotter') -> None: self.db = Arctic(host) self.db.initialize_library(library, lib_type=TICK_STORE) self.store = self.db[library] self.collection = collection
def __init__(self, use_arctic=False): """ :param use_arctic: If True, Simulator creates a connection to Arctic, Otherwise, no connection is attempted """ self._avg = None self._std = None self.cwd = os.path.dirname(os.path.realpath(__file__)) self.z_score = lambda x: (x - self._avg) / self._std try: if use_arctic: print('Attempting to connect to Arctic...') self.arctic = Arctic(MONGO_ENDPOINT) self.arctic.initialize_library(ARCTIC_NAME, lib_type=TICK_STORE) self.library = self.arctic[ARCTIC_NAME] print('Connected to Arctic.') else: print('Not connecting to Arctic') self.arctic, self.library = None, None except Exception as ex: self.arctic, self.library = None, None print('Unable to connect to Arctic database') print(ex)
def __init__(self, stocks=['QQQ', 'TQQQ', 'SQQQ'], db='sqlite', storage_dir=None, db_file='stock_data.sqlite'): self.db = db if storage_dir is None: home_dir = os.path.expanduser("~") self.storage_dir = os.path.join(home_dir, '.yfinance_data') if not os.path.exists(self.storage_dir): os.makedirs(self.storage_dir) else: self.storage_dir = storage_dir if db == 'sqlite': self.db_file = db_file # 4 slashes for absolute path: https://docs.sqlalchemy.org/en/13/core/engines.html#sqlite self.e = create_engine('sqlite:///{}/{}'.format(self.storage_dir, self.db_file)) self.con = self.e.connect() else: self.e = None self.con = None if db == 'arctic': self.store = Arctic('localhost') self.store.initialize_library('yfinance_stockdata') self.library = self.store['yfinance_stockdata'] self.stocks = stocks
def __init__(self, collection_name, database_name=None, host=None, port=None): database_name, host, _port_not_used = mongo_defaults(db=database_name, host=host, port=port) # Arctic doesn't accept a port store = Arctic(host) library_name = database_name + "." + collection_name store.initialize_library( library_name) # will this fail if already exists?? library = store[library_name] self.database_name = database_name self.collection_name = collection_name self.host = host self.store = store self.library_name = library_name self.library = library
def getArcticLibraries(): print(">>> Function called getArcticLibraries()") # Connect to local MONGODB store = Arctic('localhost') print "+ Arctic connected to MongoDB at localhost" print("+ Requesting libraries from Arctic store") return store.list_libraries()
def __init__(self): try: print('Attempting to connect to Arctic...') self.scaler = MinMaxScaler() self.arctic = Arctic(MONGO_ENDPOINT) self.arctic.initialize_library(ARCTIC_NAME, lib_type=TICK_STORE) self.library = self.arctic[ARCTIC_NAME] self.reference_data_old_names = [ 'system_time', 'day', 'coinbase_midpoint' ] self.reference_data_new_names = ['t', 'd', 'm'] self.number_of_workers = cpu_count() self.queue = Queue(maxsize=self.number_of_workers) self.return_queue = Queue(maxsize=self.number_of_workers) self.workers = [ Process(name='Process-%i' % num, args=(self.queue, self.return_queue), target=self._do_work) for num in range(self.number_of_workers) ] print('Connected to Arctic.') except Exception as ex: self.arctic, self.library, self.workers = None, None, None print('Unable to connect to Arctic database') print(ex)
def getSubsetFromArcticStore(tickerName, arcticLibraryName, start_date, end_date): print(">>> Function called getSubsetFromArcticStore(" + tickerName + ", " + arcticLibraryName + ")") # Connect to local MONGODB source = 'localhost' store = Arctic(source) print "+ Arctic connected to MongoDB at " + source # Access the library library = store[arcticLibraryName] # base = datetime.datetime.today() # date_list = [base - datetime.timedelta(days=x) for x in range(0, 365)] # Reading the data try: print "+ Arctic reading " + tickerName + " from " + arcticLibraryName item = library.read(tickerName, date_range=DateRange(start_date, end_date)) if (isEmptyDataFrame(item.data)): print("! Error no data returned for " + tickerName) else: return item.data except Exception, e: print("! Error " + tickerName + " not found in " + arcticLibraryName + ": " + str(e))
def save_to_arctic(self): # see https://github.com/manahl/arctic/blob/master/howtos/201507_demo_pydata.py arctic_store = Arctic(get_mongo_admin_conn_str()) # print(arctic_store.list_libraries()) closeprice_lib = arctic_store["jy_equity_closeprice"] # print(closeprice_lib) df = self.load_all_close_price() # print(df.index.to_frame()["o"].unique()) # df.reset_index(level=1, inplace=True) # print(df) df2 = df.pivot_table(values="close_price", index="t", columns="o", aggfunc=np.mean) # print(df2) # # print(df2.columns) i = 0 for col in df2.columns: df3 = df2.loc[:, col] df3 = df3.dropna(axis=0) closeprice_lib.write(col, df3) i += 1 print(f"{i}:{col}") # if i > 5 : # break print(closeprice_lib.list_symbols())
def save_to_chunkstore_per_symbol(self): lib_name = "jy_equity_mkt_data" arctic_store = Arctic(get_mongo_admin_conn_str()) arctic_store.delete_library(lib_name) arctic_store.initialize_library(lib_name, lib_type=CHUNK_STORE) lib_chunk_store = arctic_store[lib_name] df = self.load_all_close_price() df2 = df.pivot_table(values="close_price", index="t", columns="o", aggfunc=np.mean) df2.index.rename("date", inplace=True) i = 0 for col in df2.columns: df3 = df2.loc[:, col] df3 = df3.dropna(axis=0) lib_chunk_store.write(col, df3, chunker=DateChunker(), chunk_size="D") i += 1 if i % 2 == 0: print(f"{i}:{col}")
def test_multiprocessing_safety_parent_children_race(mongo_host, library_name): # Create Arctic and directly fork/start children (no wait) total_iterations = 12 total_processes = 6 total_writes_per_child = 20 global MY_ARCTIC for i in range(total_iterations): processes = list() MY_ARCTIC = Arctic(mongo_host=mongo_host) for j in range(total_processes): p = Process(target=f, args=(library_name, total_writes_per_child, False)) p.start( ) # start directly, don't wait to create first all children procs processes.append(p) MY_ARCTIC.initialize_library( library_name, VERSION_STORE) # this will unblock spinning children for p in processes: p.join() for p in processes: assert p.exitcode == 0 MY_ARCTIC.reset() assert isinstance(MY_ARCTIC.get_library(library_name), VersionStore)
def test_multiprocessing_safety(mongo_host, library_name): # Create/initialize library at the parent process, then spawn children, and start them aligned in time total_processes = 64 total_writes_per_child = 100 register_get_auth_hook(my_auth_hook) global MY_ARCTIC MY_ARCTIC = Arctic(mongo_host=mongo_host) MY_ARCTIC.initialize_library(library_name, VERSION_STORE) assert isinstance(MY_ARCTIC.get_library(library_name), VersionStore) processes = [ Process(target=f, args=(library_name, total_writes_per_child, True)) for _ in range(total_processes) ] for p in processes: p.start() for p in processes: p.join() for p in processes: assert p.exitcode == 0 assert isinstance(MY_ARCTIC.get_library(library_name), VersionStore)
def open(self, store='chunkstore'): self.db = Arctic('localhost') try: self.store = self.db[store] except: self.db.initialize_library(store, lib_type=CHUNK_STORE) self.store = self.db[store] self.store._arctic_lib.set_quota(maxDBStorage * 1024 * 1024 * 1024)
def arctic(mongo_server_module): from arctic import Arctic disable_arctic_cache(mongo_server_module.api) mongo_server_module.api.drop_database("arctic") mongo_server_module.api.drop_database("arctic_{}".format( getpass.getuser())) return Arctic(mongo_server_module.api)
def show_chunk_store_info(self): arctic_store = Arctic(get_mongo_admin_conn_str()) lib_chunk_store = arctic_store["jy_otv_chunkstore"] print("list_symbols") print(lib_chunk_store.list_symbols()) print("get_info") print(lib_chunk_store.get_info("close_price")) print("chunk_ranges") print(list(lib_chunk_store.get_chunk_ranges("close_price")))
def __init__(self, host): #self.MONGOHOST = 'localhost' """Initializes the store here if it hasn't already z""" try: register_library_type(FunStore._LIBRARY_TYPE, FunStore) except Exception: pass self.store = Arctic(host)
def build_db(self, n_episodes, fld): """ db最后的结构是一个数组,每一个元素是一个数据序列+表示该序列数据特征的字符串 :param n_episodes: :param fld: :return: """ store = Arctic('localhost') library = store['EOD_PASSTHROUGH'] self.quote = library.read("sz002024")['ac'] ''' 根据n_episodes和self.window_episode来slice行情数据,取整数个window_episode ''' episode = len(self.quote.index) // self.window_episode if (n_episodes > episode): n_episodes = episode self.quote = self.quote[-n_episodes * self.window_episode:] db = [] for i in range(n_episodes): #prices, title = self.sample() prices = [] p = [] part = self.quote[i * self.window_episode:((i + 1) * self.window_episode)] idx = part.index[0].strftime( '%Y-%m-%d') + "~" + part.index[-1].strftime('%Y-%m-%d') ''' while True: p = np.append(p, self.quote(full_episode=False)[0]) if len(p) > self.window_episode: break base, base_title = self.__rand_sin( period_range=self.base_period_range, amplitude_range=self.base_amplitude_range, noise_amplitude_ratio=0., full_episode=True) ''' # p = [x for x in part.values] prices.append(np.array(part.values)) # return np.array(prices).T, db.append((np.array(prices).T, '[%i]_' % i + idx)) print(db) if os.path.exists(fld): shutil.rmtree(fld) os.makedirs(fld) # os.makedirs() # don't overwrite existing fld pickle.dump(db, open(os.path.join(fld, 'db.pickle'), 'wb')) param = {'n_episodes': n_episodes} for k in self.attrs: param[k] = getattr(self, k) json.dump(param, open(os.path.join(fld, 'param.json'), 'w'))
def ingest_trades(filename: str, library: str, symbol: str) -> None: store = Arctic('localhost') logger.info(f"Saving to library: {library}, symbol: {symbol}") # Defaults to VersionStore store.initialize_library(library) library = store[library] df = pd.read_csv(filename, names=COLUMN_NAMES) df.time = pd.to_datetime(df.time, unit='s') library.write(symbol, df, metadata={'source': 'csv'})
def initialize_db(self, lib): if lib: db = Arctic('localhost') if lib not in db.list_libraries(): self.logger.info( 'Data library \'%s\' does not exist -- creating it', lib) db.initialize_library(lib, lib_type=TICK_STORE) self.dlib = lib self.dbconn = db[lib]
def init(): ''' 运行在本地,60没有外部端口 ''' c = pymongo.MongoClient(host='localhost', port=27018) db = c.bert2vec coll = db.guizhou store = Arctic('localhost') library = store['bert2vec'] return coll, library
def append_random_rows(config, args, n_rows): store = Arctic(args.mongodb, app_name="benchmark") lib_name = lib_name_from_args(config) lib = store[lib_name] for _ in range(args.appends): for sym in range(args.symbols): df = gen_oneminute_dataset(n_row=APPEND_NROWS, n_col=n_rows, dense=False) lib.append('sym' + str(sym), df)
def insert_random_data(config, args, n_rows): store = Arctic(args.mongodb, app_name="benchmark") lib_name = lib_name_from_args(config) store.delete_library(lib_name) store.initialize_library(lib_name, segment='month') lib = store[lib_name] for sym in range(args.symbols): df = gen_oneminute_dataset(n_row=n_rows, n_col=n_rows, dense=args.dense) lib.write('sym' + str(sym), df)
def __init__(self, connection_string=None): if connection_string is not None: self.connection_string = connection_string else: setting = "localhost" if setting is None: raise ValueError("") else: self.connection_string = setting self.store = Arctic(self.connection_string)
def __init__(self, hub: Hub, exchange: Exchange, market: Market, date_range: Optional[DateRange] = None ) -> None: self.library_name = str(exchange) self.symbol = str(market) self.store = Arctic('localhost') self.library = self.store[self.library_name] self.date_range = date_range self.publisher = Publisher(hub, prefix='arctic_trade_processor')