class TimeSuiteWrite(object): params = list(range(len(TEST_SIZES))) param_names = ['5K * 10^'] def setup(self, arg): self.store = Arctic("127.0.0.1") self.store.delete_library('test.lib') self.store.initialize_library('test.lib') self.lib = self.store['test.lib'] def teardown(self, arg): self.store.delete_library('test.lib') self.lib = None def time_write_dataframe_random(self, idx): self.lib.write('df_bench_random', df_random[idx]) def time_write_series_random(self, idx): self.lib.write('series_bench_random', s_random[idx]) def time_write_dataframe_compressible(self, idx): self.lib.write('df_bench_compressible', df_compress[idx]) def time_write_series_compressible(self, idx): self.lib.write('series_bench_compressible', s_compress[idx])
def save_to_chunkstore_per_symbol(self): lib_name = "jy_equity_mkt_data" arctic_store = Arctic(get_mongo_admin_conn_str()) arctic_store.delete_library(lib_name) arctic_store.initialize_library(lib_name, lib_type=CHUNK_STORE) lib_chunk_store = arctic_store[lib_name] df = self.load_all_close_price() df2 = df.pivot_table(values="close_price", index="t", columns="o", aggfunc=np.mean) df2.index.rename("date", inplace=True) i = 0 for col in df2.columns: df3 = df2.loc[:, col] df3 = df3.dropna(axis=0) lib_chunk_store.write(col, df3, chunker=DateChunker(), chunk_size="D") i += 1 if i % 2 == 0: print(f"{i}:{col}")
def insert_random_data(config, args, n_rows): store = Arctic(args.mongodb, app_name="benchmark") lib_name = lib_name_from_args(config) store.delete_library(lib_name) store.initialize_library(lib_name, segment='month') lib = store[lib_name] for sym in range(args.symbols): df = gen_oneminute_dataset(n_row=n_rows, n_col=n_rows, dense=args.dense) lib.write('sym' + str(sym), df)
def convert_mkt_history_data(self): arctic_store = Arctic(get_mongo_admin_conn_str()) lib_name = "jy_chn_equity_otvn_chunkstore" arctic_store.delete_library(lib_name) arctic_store.initialize_library(lib_name, lib_type=CHUNK_STORE) lib_chunk_store = arctic_store[lib_name] # 先 hardcode 日期范围,可以有更优雅的表达 for i, t_period in enumerate([(date(1990, 1, 1), date(2000, 1, 15)), (date(2000, 1, 15), date(2010, 1, 15)), (date(2010, 1, 15), date(2020, 1, 1))]): # 测算下来,日频数据,用 "M" 作为 chunk_size 的写入和读取效率是综合最高的 self._convert_period_equity_mkt_data_to_arctic( t_period[0], t_period[1], lib_chunk_store, "mkt_data", "M", i == 0)
class TimeSuiteAppend(object): params = list(range(len(TEST_SIZES))) param_names = ['5K * 10^'] def __init__(self): self.store = Arctic("127.0.0.1") def setup(self, idx): self.store.delete_library('test.lib') self.store.initialize_library('test.lib') self.lib = self.store['test.lib'] self.lib.write('test_df', df_random[idx]) def teardown(self, arg): self.store.delete_library('test.lib') self.lib = None def time_append_dataframe(self, idx): self.lib.append('test_df', df_random[idx])
def remove_time_series_cache_on_disk(self, fname, engine='hdf5_fixed', db_server='127.0.0.1'): # default HDF5 format hdf5_format = 'fixed' if 'hdf5' in engine: hdf5_format = engine.split('_')[1] engine = 'hdf5' if (engine == 'bcolz'): # convert invalid characters to substitutes (which Bcolz can't deal with) pass elif (engine == 'arctic'): from arctic import Arctic import pymongo socketTimeoutMS = 10 * 1000 fname = os.path.basename(fname).replace('.', '_') self.logger.info('Load MongoDB library: ' + fname) c = pymongo.MongoClient(db_server, connect=False) store = Arctic(c, socketTimeoutMS=socketTimeoutMS, serverSelectionTimeoutMS=socketTimeoutMS) store.delete_library(fname) c.close() self.logger.info("Deleted MongoDB library: " + fname) elif (engine == 'hdf5'): h5_filename = self.get_h5_filename(fname) # delete the old copy try: os.remove(h5_filename) except: pass
def save_to_arctic_tickstore(self): # not work arctic_store = Arctic(get_mongo_admin_conn_str()) arctic_store.delete_library("jy_otv_tickstore") arctic_store.initialize_library("jy_otv_tickstore", lib_type=TICK_STORE) lib_tick_store = arctic_store["jy_otv_tickstore"] # lib_tick_store._chunk_size = 8396800 # print(closeprice_lib) df = self.load_all_close_price() # print(df.index.to_frame()["o"].unique()) df2 = df.pivot_table(values="close_price", index="t", columns="o", aggfunc=np.mean) df2.index = df2.index.tz_localize("Asia/Shanghai") # df2.reset_index(df2.index.tz_localize("Asia/Shanghai"), inplace=True) # print(df2.head()) lib_tick_store.write("close_price", df2)
def save_to_chunkstore(self): arctic_store = Arctic(get_mongo_admin_conn_str()) arctic_store.delete_library("jy_otv_chunkstore") arctic_store.initialize_library("jy_otv_chunkstore", lib_type=CHUNK_STORE) lib_chunk_store = arctic_store["jy_otv_chunkstore"] # lib_tick_store._chunk_size = 8396800 # print(closeprice_lib) df = self.load_all_close_price() df.sort_index(axis=0, ascending=True, inplace=True) print(df) # df2 = df.pivot_table(values="close_price", index="t", columns="o", aggfunc=np.mean) # df2.index = df2.index.tz_localize("Asia/Shanghai") # df2.index.rename("date",inplace=True) # df2.reset_index(df2.index.tz_localize("Asia/Shanghai"), inplace=True) # print(df2.head()) # print(df2[-100:]) start = time.time() lib_chunk_store.write("close_price", df, chunk_size="M") print(f"total write time {time.time()-start} ")
def remove_time_series_cache_on_disk(self, fname, engine='hdf5_fixed', db_server='127.0.0.1', db_port='6379', timeout=10, username=None, password=None): if 'hdf5' in engine: engine = 'hdf5' if (engine == 'bcolz'): # convert invalid characters to substitutes (which Bcolz can't deal with) pass elif (engine == 'redis'): import redis fname = os.path.basename(fname).replace('.', '_') try: r = redis.StrictRedis(host=db_server, port=db_port, db=0, socket_timeout=timeout, socket_connect_timeout=timeout) if (fname == 'flush_all_keys'): r.flushall() else: # allow deletion of keys by pattern matching if "*" in fname: x = r.keys(fname) if len(x) > 0: r.delete(x) r.delete(fname) except Exception as e: self.logger.warning("Cannot delete non-existent key " + fname + " in Redis: " + str(e)) elif (engine == 'arctic'): from arctic import Arctic import pymongo socketTimeoutMS = 30 * 1000 fname = os.path.basename(fname).replace('.', '_') self.logger.info('Load MongoDB library: ' + fname) if username is not None and password is not None: c = pymongo.MongoClient( host="mongodb://" + username + ":" + password + "@" + str(db_server) + ":" + str(db_port), connect=False) # , username=username, password=password) else: c = pymongo.MongoClient(host="mongodb://" + str(db_server) + ":" + str(db_port), connect=False) store = Arctic(c, socketTimeoutMS=socketTimeoutMS, serverSelectionTimeoutMS=socketTimeoutMS, connectTimeoutMS=socketTimeoutMS) store.delete_library(fname) c.close() self.logger.info("Deleted MongoDB library: " + fname) elif (engine == 'hdf5'): h5_filename = self.get_h5_filename(fname) # delete the old copy try: os.remove(h5_filename) except: pass
def get_stock_history(ticker, start_date, end_date): data = ystockquote.get_historical_prices(ticker, start_date, end_date) df = pandas.DataFrame(collections.OrderedDict(sorted(data.items()))).T df = df.convert_objects(convert_numeric=True) return df ################################################ # VersionStore: Storing and updating stock data ################################################ arctic = Arctic("localhost") # Create a VersionStore library arctic.delete_library("jblackburn.stocks") arctic.initialize_library("jblackburn.stocks") arctic.list_libraries() stocks = arctic["jblackburn.stocks"] # get some prices aapl = get_stock_history("aapl", "2015-01-01", "2015-02-01") aapl # store them in the library stocks.write("aapl", aapl, metadata={"source": "YAHOO"}) stocks.read("aapl").data["Adj Close"].plot() stocks.read("aapl").metadata stocks.read("aapl").version
import os import pandas as pd from pymongo import MongoClient import keyring import ssl if __name__ == "__main__": # client = MongoClient("localhost") client = MongoClient(keyring.get_password('atlas', 'connection_string'), ssl_cert_reqs=ssl.CERT_NONE) a = Arctic(client) if a.library_exists('fund'): a.delete_library('fund') if a.library_exists('fund_adj'): a.delete_library('fund_adj') fund = a.initialize_library('fund', CHUNK_STORE) fund_adj = a.initialize_library('fund_adj', CHUNK_STORE) fund = a['fund'] fund_adj = a['fund_adj'] local = Arctic('localhost') fund_local = local['fund'] fund_adj_local = local['fund_adj'] for symbol in fund_local.list_symbols():
def get_stock_history(ticker, start_date, end_date): data = ystockquote.get_historical_prices(ticker, start_date, end_date) df = pandas.DataFrame(collections.OrderedDict(sorted(data.items()))).T df = df.convert_objects(convert_numeric=True) return df ################################################ # VersionStore: Storing and updating stock data ################################################ arctic = Arctic('localhost') # Create a VersionStore library arctic.delete_library('jblackburn.stocks') arctic.initialize_library('jblackburn.stocks') arctic.list_libraries() stocks = arctic['jblackburn.stocks'] # get some prices aapl = get_stock_history('aapl', '2015-01-01', '2015-02-01') aapl # store them in the library stocks.write('aapl', aapl, metadata={'source': 'YAHOO'}) stocks.read('aapl').data['Adj Close'].plot() stocks.read('aapl').metadata stocks.read('aapl').version
data = ystockquote.get_historical_prices(ticker, start_date, end_date) df = pd.DataFrame(collections.OrderedDict(sorted(data.items()))).T df = df.convert_objects(convert_numeric=True) return df ################################################ # VersionStore: Storing and updating stock data ################################################ arctic = Arctic('localhost') # Create a VersionStore library arctic.delete_library('jblackburn.stocks') arctic.initialize_library('jblackburn.stocks') arctic.list_libraries() stocks = arctic['jblackburn.stocks'] # get some prices aapl = get_stock_history('aapl', '2015-01-01', '2015-02-01') aapl # store them in the library stocks.write('aapl', aapl, metadata={'source': 'YAHOO'}) stocks.read('aapl').data['Adj Close'].plot() stocks.read('aapl').metadata stocks.read('aapl').version
class TestArcticBroker(unittest.TestCase): def setUp(self): self.robot_user = "******" self.import_comment = "importcomment" self.arctic = Arctic('localhost') self.library_name = marc.random_library(self.arctic) lu.logger.info("Created test library {}".format(self.library_name)) self.arctic.initialize_library(self.library_name) simple_pd =pu.create_simple_series(['a','b','c'],5) lib = self.arctic[self.library_name] import_pandas(lib,simple_pd,"symbol",RevisionInfo(who=self.robot_user,what=self.import_comment, when = datetime.datetime.now())) import_pandas(lib,simple_pd,"ES.SETL.EOD",RevisionInfo(who=self.robot_user,what="import something else", when = datetime.datetime.now())) def tearDown(self): self.arctic.delete_library(self.library_name) lu.logger.info("deleted test library {}".format(self.library_name)) def test_checkout_and_checkin_arctic(self): url = "arctic:///{}/symbol".format(self.library_name) broker = ArcticBroker(self.arctic) matrix = broker.checkout(url) history = broker.history(url) self.assertEqual(1, len(history)) self.assertEqual(self.robot_user,history[0].revision_info.who) self.assertEqual("1", history[0].id) self.assertEqual(self.import_comment, history[0].revision_info.what) num_rows_original_version_1 = len(matrix.content.index) df = matrix.content.append(pd.DataFrame(data=np.random.randn(1, len(matrix.content.columns)), index=[matrix.content.index[-1] + datetime.timedelta(days=1)],columns=matrix.content.columns)) revision_info = RevisionInfo(who="Jeremy Ward", what="first test commit", when=datetime.datetime(year=2000,month=1,day=13)) broker.commit(matrix.replace_content(df),revision_info) hist = broker.history(url) self.assertEqual(2,len(hist)) self.assertEqual(hist[1].revision_info.what,"first test commit") self.assertEqual("2", hist[1].id) matrix = broker.checkout(url) num_rows_original_version_2 = len(matrix.content.index) self.assertEquals(1, num_rows_original_version_2 - num_rows_original_version_1) broker.release(matrix) original_version = broker.history(url)[0].id matrix = broker.checkout(url, original_version) self.assertEqual(num_rows_original_version_1,len(matrix.content.index)) broker.release(matrix) version_2 = broker.history(url)[1].id matrix = broker.checkout(url, version=version_2) self.assertEquals(num_rows_original_version_2,len(matrix.content.index)) broker.release(matrix) self.assertEqual(2,len(broker.history(url))) def test_list(self): broker = ArcticBroker(self.arctic) result = broker.list() self.assertEquals(2, len(result)) self.assertEqual("{}/ES/SETL/EOD".format(self.library_name),result[0].path) def test_peek_with_existing_file(self): broker = ArcticBroker(self.arctic) url = "arctic:///{}/symbol".format(self.library_name) preview = broker.peek(url) todays_date = datetime.datetime.now().date() expected_start_date = todays_date- datetime.timedelta(5) expected_end_date = expected_start_date + datetime.timedelta(4) self.assertEqual(expected_start_date.strftime("%Y-%m-%d"), preview.range_start.strftime("%Y-%m-%d")) self.assertEqual(expected_end_date.strftime("%Y-%m-%d"), preview.range_end.strftime("%Y-%m-%d")) def test_peek_non_existing_file(self): broker = ArcticBroker(self.arctic) testurl = "arctic:///subdir_1/file_name_xxx.csv?format=CSV" preview = broker.peek(testurl) self.assertIsNone(preview) def test_with_qualified_ticker_name(self): url = "arctic:///{}/ES/SETL/EOD".format(self.library_name) broker = ArcticBroker(self.arctic) matrix = broker.checkout(url) self.assertEqual(matrix.matrix_header.path,"/{}/ES/SETL/EOD".format(self.library_name)) def tearDown(self): self.arctic.delete_library(self.library_name) lu.logger.info("deleted test library {}".format(self.library_name))
# Connect to Local MONGODB store = Arctic('localhost') # Create the library - defaults to VersionStore store.initialize_library('TESTE') # Access the library library = store['TESTE'] # Create some data data = pd.DataFrame([1, 2], index=[3, 4], columns=['coluna1']) # Store the data in the library library.write('TestData', data) # List libraries from store store.list_libraries() # List symbols from library library.list_symbols() # Read back the data data_readed = library.read('TestData').data # Delete data from library library.delete('TestData') # Delete library store.delete_library('TESTE')
def delete_lib(Arctic, lib_name): Arctic.delete_library(lib_name)
class TestMongoBroker(unittest.TestCase): def setUp(self): self.username = randomString() self.time_now = datetime.datetime.now() self.arctic = Arctic('localhost') self.library_name = marc.random_library(self.arctic) self.symbol_name = 'symbol' self.qualified_symbol_name = "ES.SETL.EOD" self.qualified_symbol_name_as_path = self.qualified_symbol_name.replace('.','/') lu.logger.info("Created test library {}".format(self.library_name)) self.arctic.initialize_library(self.library_name) pd = pu.create_simple_series(['a','b','c'] , 50) import_pandas(self.arctic[self.library_name],pd,self.symbol_name,RevisionInfo(who=self.username, what="init", when=self.time_now)) import_pandas(self.arctic[self.library_name],pu.create_simple_series(['a','b','c','d'] , 75),self.qualified_symbol_name, RevisionInfo(who=self.username, what="init qualified", when=self.time_now)) def tearDown(self): self.arctic.delete_library(self.library_name) lu.logger.info("deleted test library {}".format(self.library_name)) def test_acquire_data(self): method = ArcticStorageMethod(self.arctic) result = method.acquireContent("{}/symbol".format(self.library_name),{}) data_frame = result.content self.assertFalse(result.header.path[0] == '/') self.assertIs(3,len(data_frame.columns)) self.assertIsNone(result.header.description) self.assertEqual(result.header.name,result.header.path) def test_history(self): method = ArcticStorageMethod(self.arctic) url = MatrixUrl("{}/symbol".format(self.library_name)) result = method.history(url) self.assertEquals(1,len(result)) def test_acquire_data_with_qualified_symbol(self): method = ArcticStorageMethod(self.arctic) url = "{}/{}".format(self.library_name,self.qualified_symbol_name_as_path) result = method.acquireContent(url,{}) data_frame = result.content self.assertIs(4,len(data_frame.columns)) def test_revision_storage(self): time_now = datetime.datetime.now() info = RevisionInfo(who="aPerson",what="a Change", when=time_now) metadata = {} add_revision_to_metadata(Revision("1",info),metadata) info2 = RevisionInfo(who="anotherPerson", what="anotherChange", when=time_now) add_revision_to_metadata( Revision("2", info2), metadata) revs = get_revisions_from_metadata(metadata) self.assertEquals(2,len(revs)) revision = revs[0] self.assertEquals("1",revision.id) self.assertEquals("a Change", revision.revision_info.what) self.assertEquals("aPerson", revision.revision_info.who) self.assertEquals(time_now, revision.revision_info.when) revision = revs[1] self.assertEquals("2",revision.id) self.assertEquals("anotherChange", revision.revision_info.what) self.assertEquals("anotherPerson", revision.revision_info.who) self.assertEquals(time_now, revision.revision_info.when) def test_acquire_missing_data(self): method = ArcticStorageMethod(self.arctic) with self.assertRaisesRegexp(StorageMethod.ResourceException,'^ticker nosymbol not found$'): method.acquireContent("{}/nosymbol".format(self.library_name),{}) with self.assertRaisesRegexp(StorageMethod.ResourceException,'^library nolib not found in library list'): method.acquireContent("nolib/nosymbol".format(self.library_name),{})