Exemple #1
0
class TimeSuiteWrite(object):
    params = list(range(len(TEST_SIZES)))
    param_names = ['5K * 10^']

    def setup(self, arg):
        self.store = Arctic("127.0.0.1")
        self.store.delete_library('test.lib')
        self.store.initialize_library('test.lib')
        self.lib = self.store['test.lib']

    def teardown(self, arg):
        self.store.delete_library('test.lib')
        self.lib = None

    def time_write_dataframe_random(self, idx):
        self.lib.write('df_bench_random', df_random[idx])

    def time_write_series_random(self, idx):
        self.lib.write('series_bench_random', s_random[idx])

    def time_write_dataframe_compressible(self, idx):
        self.lib.write('df_bench_compressible', df_compress[idx])

    def time_write_series_compressible(self, idx):
        self.lib.write('series_bench_compressible', s_compress[idx])
Exemple #2
0
    def save_to_chunkstore_per_symbol(self):
        lib_name = "jy_equity_mkt_data"
        arctic_store = Arctic(get_mongo_admin_conn_str())
        arctic_store.delete_library(lib_name)
        arctic_store.initialize_library(lib_name, lib_type=CHUNK_STORE)
        lib_chunk_store = arctic_store[lib_name]

        df = self.load_all_close_price()

        df2 = df.pivot_table(values="close_price",
                             index="t",
                             columns="o",
                             aggfunc=np.mean)
        df2.index.rename("date", inplace=True)

        i = 0
        for col in df2.columns:
            df3 = df2.loc[:, col]
            df3 = df3.dropna(axis=0)
            lib_chunk_store.write(col,
                                  df3,
                                  chunker=DateChunker(),
                                  chunk_size="D")
            i += 1
            if i % 2 == 0:
                print(f"{i}:{col}")
Exemple #3
0
class TimeSuiteWrite(object):
    params = list(range(len(TEST_SIZES)))
    param_names = ['5K * 10^']

    def setup(self, arg):
        self.store = Arctic("127.0.0.1")
        self.store.delete_library('test.lib')
        self.store.initialize_library('test.lib')
        self.lib = self.store['test.lib']

    def teardown(self, arg):
        self.store.delete_library('test.lib')
        self.lib = None

    def time_write_dataframe_random(self, idx):
        self.lib.write('df_bench_random', df_random[idx])

    def time_write_series_random(self, idx):
        self.lib.write('series_bench_random', s_random[idx])

    def time_write_dataframe_compressible(self, idx):
        self.lib.write('df_bench_compressible', df_compress[idx])

    def time_write_series_compressible(self, idx):
        self.lib.write('series_bench_compressible', s_compress[idx])
Exemple #4
0
def insert_random_data(config, args, n_rows):
    store = Arctic(args.mongodb, app_name="benchmark")
    lib_name = lib_name_from_args(config)
    store.delete_library(lib_name)
    store.initialize_library(lib_name, segment='month')
    lib = store[lib_name]

    for sym in range(args.symbols):
        df = gen_oneminute_dataset(n_row=n_rows, n_col=n_rows, dense=args.dense)
        lib.write('sym' + str(sym), df)
Exemple #5
0
def insert_random_data(config, args, n_rows):
    store = Arctic(args.mongodb, app_name="benchmark")
    lib_name = lib_name_from_args(config)
    store.delete_library(lib_name)
    store.initialize_library(lib_name, segment='month')
    lib = store[lib_name]

    for sym in range(args.symbols):
        df = gen_oneminute_dataset(n_row=n_rows, n_col=n_rows, dense=args.dense)
        lib.write('sym' + str(sym), df)
Exemple #6
0
    def convert_mkt_history_data(self):
        arctic_store = Arctic(get_mongo_admin_conn_str())
        lib_name = "jy_chn_equity_otvn_chunkstore"

        arctic_store.delete_library(lib_name)
        arctic_store.initialize_library(lib_name, lib_type=CHUNK_STORE)
        lib_chunk_store = arctic_store[lib_name]

        # 先 hardcode 日期范围,可以有更优雅的表达
        for i, t_period in enumerate([(date(1990, 1, 1), date(2000, 1, 15)),
                                      (date(2000, 1, 15), date(2010, 1, 15)),
                                      (date(2010, 1, 15), date(2020, 1, 1))]):
            # 测算下来,日频数据,用 "M" 作为 chunk_size 的写入和读取效率是综合最高的
            self._convert_period_equity_mkt_data_to_arctic(
                t_period[0], t_period[1], lib_chunk_store, "mkt_data", "M",
                i == 0)
Exemple #7
0
class TimeSuiteAppend(object):
    params = list(range(len(TEST_SIZES)))
    param_names = ['5K * 10^']

    def __init__(self):
        self.store = Arctic("127.0.0.1")

    def setup(self, idx):
        self.store.delete_library('test.lib')
        self.store.initialize_library('test.lib')
        self.lib = self.store['test.lib']

        self.lib.write('test_df', df_random[idx])

    def teardown(self, arg):
        self.store.delete_library('test.lib')
        self.lib = None

    def time_append_dataframe(self, idx):
        self.lib.append('test_df', df_random[idx])
Exemple #8
0
class TimeSuiteAppend(object):
    params = list(range(len(TEST_SIZES)))
    param_names = ['5K * 10^']

    def __init__(self):
        self.store = Arctic("127.0.0.1")

    def setup(self, idx):
        self.store.delete_library('test.lib')
        self.store.initialize_library('test.lib')
        self.lib = self.store['test.lib']

        self.lib.write('test_df', df_random[idx])

    def teardown(self, arg):
        self.store.delete_library('test.lib')
        self.lib = None 

    def time_append_dataframe(self, idx):
        self.lib.append('test_df', df_random[idx])
Exemple #9
0
    def remove_time_series_cache_on_disk(self,
                                         fname,
                                         engine='hdf5_fixed',
                                         db_server='127.0.0.1'):
        # default HDF5 format
        hdf5_format = 'fixed'

        if 'hdf5' in engine:
            hdf5_format = engine.split('_')[1]
            engine = 'hdf5'

        if (engine == 'bcolz'):
            # convert invalid characters to substitutes (which Bcolz can't deal with)
            pass
        elif (engine == 'arctic'):
            from arctic import Arctic
            import pymongo

            socketTimeoutMS = 10 * 1000
            fname = os.path.basename(fname).replace('.', '_')

            self.logger.info('Load MongoDB library: ' + fname)

            c = pymongo.MongoClient(db_server, connect=False)
            store = Arctic(c,
                           socketTimeoutMS=socketTimeoutMS,
                           serverSelectionTimeoutMS=socketTimeoutMS)
            store.delete_library(fname)

            c.close()

            self.logger.info("Deleted MongoDB library: " + fname)

        elif (engine == 'hdf5'):
            h5_filename = self.get_h5_filename(fname)

            # delete the old copy
            try:
                os.remove(h5_filename)
            except:
                pass
Exemple #10
0
    def save_to_arctic_tickstore(self):
        # not work
        arctic_store = Arctic(get_mongo_admin_conn_str())
        arctic_store.delete_library("jy_otv_tickstore")
        arctic_store.initialize_library("jy_otv_tickstore",
                                        lib_type=TICK_STORE)
        lib_tick_store = arctic_store["jy_otv_tickstore"]
        # lib_tick_store._chunk_size = 8396800

        # print(closeprice_lib)
        df = self.load_all_close_price()
        # print(df.index.to_frame()["o"].unique())

        df2 = df.pivot_table(values="close_price",
                             index="t",
                             columns="o",
                             aggfunc=np.mean)

        df2.index = df2.index.tz_localize("Asia/Shanghai")
        # df2.reset_index(df2.index.tz_localize("Asia/Shanghai"), inplace=True)
        # print(df2.head())

        lib_tick_store.write("close_price", df2)
Exemple #11
0
    def save_to_chunkstore(self):
        arctic_store = Arctic(get_mongo_admin_conn_str())
        arctic_store.delete_library("jy_otv_chunkstore")
        arctic_store.initialize_library("jy_otv_chunkstore",
                                        lib_type=CHUNK_STORE)
        lib_chunk_store = arctic_store["jy_otv_chunkstore"]
        # lib_tick_store._chunk_size = 8396800

        # print(closeprice_lib)
        df = self.load_all_close_price()
        df.sort_index(axis=0, ascending=True, inplace=True)
        print(df)

        # df2 = df.pivot_table(values="close_price", index="t", columns="o", aggfunc=np.mean)

        # df2.index = df2.index.tz_localize("Asia/Shanghai")
        # df2.index.rename("date",inplace=True)
        # df2.reset_index(df2.index.tz_localize("Asia/Shanghai"), inplace=True)
        # print(df2.head())
        # print(df2[-100:])

        start = time.time()
        lib_chunk_store.write("close_price", df, chunk_size="M")
        print(f"total write time {time.time()-start} ")
Exemple #12
0
    def remove_time_series_cache_on_disk(self, fname, engine='hdf5_fixed', db_server='127.0.0.1', db_port='6379',
                                         timeout=10, username=None,
                                         password=None):

        if 'hdf5' in engine:
            engine = 'hdf5'

        if (engine == 'bcolz'):
            # convert invalid characters to substitutes (which Bcolz can't deal with)
            pass
        elif (engine == 'redis'):
            import redis

            fname = os.path.basename(fname).replace('.', '_')

            try:
                r = redis.StrictRedis(host=db_server, port=db_port, db=0, socket_timeout=timeout,
                                      socket_connect_timeout=timeout)

                if (fname == 'flush_all_keys'):
                    r.flushall()
                else:
                    # allow deletion of keys by pattern matching
                    if "*" in fname:
                        x = r.keys(fname)

                        if len(x) > 0:
                            r.delete(x)

                    r.delete(fname)

            except Exception as e:
                self.logger.warning("Cannot delete non-existent key " + fname + " in Redis: " + str(e))

        elif (engine == 'arctic'):
            from arctic import Arctic
            import pymongo

            socketTimeoutMS = 30 * 1000
            fname = os.path.basename(fname).replace('.', '_')

            self.logger.info('Load MongoDB library: ' + fname)

            if username is not None and password is not None:
                c = pymongo.MongoClient(
                    host="mongodb://" + username + ":" + password + "@" + str(db_server) + ":" + str(db_port),
                    connect=False)  # , username=username, password=password)
            else:
                c = pymongo.MongoClient(host="mongodb://" + str(db_server) + ":" + str(db_port), connect=False)

            store = Arctic(c, socketTimeoutMS=socketTimeoutMS, serverSelectionTimeoutMS=socketTimeoutMS,
                           connectTimeoutMS=socketTimeoutMS)

            store.delete_library(fname)

            c.close()

            self.logger.info("Deleted MongoDB library: " + fname)

        elif (engine == 'hdf5'):
            h5_filename = self.get_h5_filename(fname)

            # delete the old copy
            try:
                os.remove(h5_filename)
            except:
                pass
Exemple #13
0
def get_stock_history(ticker, start_date, end_date):
    data = ystockquote.get_historical_prices(ticker, start_date, end_date)
    df = pandas.DataFrame(collections.OrderedDict(sorted(data.items()))).T
    df = df.convert_objects(convert_numeric=True)
    return df


################################################
# VersionStore: Storing and updating stock data
################################################


arctic = Arctic("localhost")

# Create a VersionStore library
arctic.delete_library("jblackburn.stocks")
arctic.initialize_library("jblackburn.stocks")
arctic.list_libraries()


stocks = arctic["jblackburn.stocks"]

# get some prices
aapl = get_stock_history("aapl", "2015-01-01", "2015-02-01")
aapl

# store them in the library
stocks.write("aapl", aapl, metadata={"source": "YAHOO"})
stocks.read("aapl").data["Adj Close"].plot()
stocks.read("aapl").metadata
stocks.read("aapl").version
Exemple #14
0
import os
import pandas as pd
from pymongo import MongoClient
import keyring
import ssl

if __name__ == "__main__":

    # client = MongoClient("localhost")
    client = MongoClient(keyring.get_password('atlas', 'connection_string'),
                         ssl_cert_reqs=ssl.CERT_NONE)

    a = Arctic(client)

    if a.library_exists('fund'):
        a.delete_library('fund')
    if a.library_exists('fund_adj'):
        a.delete_library('fund_adj')

    fund = a.initialize_library('fund', CHUNK_STORE)
    fund_adj = a.initialize_library('fund_adj', CHUNK_STORE)

    fund = a['fund']
    fund_adj = a['fund_adj']

    local = Arctic('localhost')

    fund_local = local['fund']
    fund_adj_local = local['fund_adj']

    for symbol in fund_local.list_symbols():
Exemple #15
0
def get_stock_history(ticker, start_date, end_date):
    data = ystockquote.get_historical_prices(ticker, start_date, end_date)
    df = pandas.DataFrame(collections.OrderedDict(sorted(data.items()))).T
    df = df.convert_objects(convert_numeric=True)
    return df


################################################
# VersionStore: Storing and updating stock data
################################################

arctic = Arctic('localhost')

# Create a VersionStore library
arctic.delete_library('jblackburn.stocks')
arctic.initialize_library('jblackburn.stocks')
arctic.list_libraries()

stocks = arctic['jblackburn.stocks']

# get some prices
aapl = get_stock_history('aapl', '2015-01-01', '2015-02-01')
aapl

# store them in the library
stocks.write('aapl', aapl, metadata={'source': 'YAHOO'})
stocks.read('aapl').data['Adj Close'].plot()
stocks.read('aapl').metadata
stocks.read('aapl').version
Exemple #16
0
    data = ystockquote.get_historical_prices(ticker, start_date, end_date)
    df = pd.DataFrame(collections.OrderedDict(sorted(data.items()))).T
    df = df.convert_objects(convert_numeric=True)
    return df


################################################
# VersionStore: Storing and updating stock data
################################################



arctic = Arctic('localhost')

# Create a VersionStore library
arctic.delete_library('jblackburn.stocks')
arctic.initialize_library('jblackburn.stocks')
arctic.list_libraries()


stocks = arctic['jblackburn.stocks']

# get some prices
aapl = get_stock_history('aapl', '2015-01-01', '2015-02-01')
aapl

# store them in the library
stocks.write('aapl', aapl, metadata={'source': 'YAHOO'})
stocks.read('aapl').data['Adj Close'].plot()
stocks.read('aapl').metadata
stocks.read('aapl').version
class TestArcticBroker(unittest.TestCase):

    def setUp(self):
        self.robot_user = "******"
        self.import_comment = "importcomment"
        self.arctic = Arctic('localhost')
        self.library_name = marc.random_library(self.arctic)
        lu.logger.info("Created test library {}".format(self.library_name))
        self.arctic.initialize_library(self.library_name)
        simple_pd =pu.create_simple_series(['a','b','c'],5)
        lib = self.arctic[self.library_name]

        import_pandas(lib,simple_pd,"symbol",RevisionInfo(who=self.robot_user,what=self.import_comment, when = datetime.datetime.now()))
        import_pandas(lib,simple_pd,"ES.SETL.EOD",RevisionInfo(who=self.robot_user,what="import something else", when = datetime.datetime.now()))

    def tearDown(self):
        self.arctic.delete_library(self.library_name)
        lu.logger.info("deleted test library {}".format(self.library_name))


    def test_checkout_and_checkin_arctic(self):
        url = "arctic:///{}/symbol".format(self.library_name)
        broker = ArcticBroker(self.arctic)
        matrix = broker.checkout(url)
        history = broker.history(url)
        self.assertEqual(1, len(history))
        self.assertEqual(self.robot_user,history[0].revision_info.who)
        self.assertEqual("1", history[0].id)
        self.assertEqual(self.import_comment, history[0].revision_info.what)
        num_rows_original_version_1 = len(matrix.content.index)

        df  = matrix.content.append(pd.DataFrame(data=np.random.randn(1, len(matrix.content.columns)), index=[matrix.content.index[-1] + datetime.timedelta(days=1)],columns=matrix.content.columns))
        revision_info = RevisionInfo(who="Jeremy Ward", what="first test commit", when=datetime.datetime(year=2000,month=1,day=13))
        broker.commit(matrix.replace_content(df),revision_info)
        hist = broker.history(url)
        self.assertEqual(2,len(hist))
        self.assertEqual(hist[1].revision_info.what,"first test commit")
        self.assertEqual("2", hist[1].id)
        matrix = broker.checkout(url)


        num_rows_original_version_2 = len(matrix.content.index)
        self.assertEquals(1, num_rows_original_version_2 - num_rows_original_version_1)
        broker.release(matrix)
        original_version = broker.history(url)[0].id
        matrix = broker.checkout(url, original_version)
        self.assertEqual(num_rows_original_version_1,len(matrix.content.index))
        broker.release(matrix)

        version_2 = broker.history(url)[1].id
        matrix = broker.checkout(url, version=version_2)
        self.assertEquals(num_rows_original_version_2,len(matrix.content.index))
        broker.release(matrix)
        self.assertEqual(2,len(broker.history(url)))


    def test_list(self):
        broker = ArcticBroker(self.arctic)
        result = broker.list()
        self.assertEquals(2, len(result))
        self.assertEqual("{}/ES/SETL/EOD".format(self.library_name),result[0].path)


    def test_peek_with_existing_file(self):
        broker = ArcticBroker(self.arctic)

        url = "arctic:///{}/symbol".format(self.library_name)
        preview = broker.peek(url)

        todays_date = datetime.datetime.now().date()

        expected_start_date =  todays_date- datetime.timedelta(5)
        expected_end_date = expected_start_date + datetime.timedelta(4)

        self.assertEqual(expected_start_date.strftime("%Y-%m-%d"), preview.range_start.strftime("%Y-%m-%d"))
        self.assertEqual(expected_end_date.strftime("%Y-%m-%d"), preview.range_end.strftime("%Y-%m-%d"))

    def test_peek_non_existing_file(self):
        broker = ArcticBroker(self.arctic)
        testurl = "arctic:///subdir_1/file_name_xxx.csv?format=CSV"
        preview = broker.peek(testurl)
        self.assertIsNone(preview)

    def test_with_qualified_ticker_name(self):
        url = "arctic:///{}/ES/SETL/EOD".format(self.library_name)
        broker = ArcticBroker(self.arctic)
        matrix = broker.checkout(url)
        self.assertEqual(matrix.matrix_header.path,"/{}/ES/SETL/EOD".format(self.library_name))


    def tearDown(self):
        self.arctic.delete_library(self.library_name)
        lu.logger.info("deleted test library {}".format(self.library_name))
Exemple #18
0
# Connect to Local MONGODB
store = Arctic('localhost')

# Create the library - defaults to VersionStore
store.initialize_library('TESTE')

# Access the library
library = store['TESTE']

# Create some data
data = pd.DataFrame([1, 2], index=[3, 4], columns=['coluna1'])

# Store the data in the library
library.write('TestData', data)

# List libraries from store
store.list_libraries()

# List symbols from library
library.list_symbols()

# Read back the data
data_readed = library.read('TestData').data

# Delete data from library
library.delete('TestData')

# Delete library
store.delete_library('TESTE')
def delete_lib(Arctic, lib_name):
    Arctic.delete_library(lib_name)
class TestMongoBroker(unittest.TestCase):
    def setUp(self):
        self.username = randomString()
        self.time_now = datetime.datetime.now()
        self.arctic = Arctic('localhost')
        self.library_name = marc.random_library(self.arctic)
        self.symbol_name = 'symbol'
        self.qualified_symbol_name = "ES.SETL.EOD"
        self.qualified_symbol_name_as_path = self.qualified_symbol_name.replace('.','/')
        lu.logger.info("Created test library {}".format(self.library_name))
        self.arctic.initialize_library(self.library_name)
        pd = pu.create_simple_series(['a','b','c'] , 50)
        import_pandas(self.arctic[self.library_name],pd,self.symbol_name,RevisionInfo(who=self.username, what="init", when=self.time_now))
        import_pandas(self.arctic[self.library_name],pu.create_simple_series(['a','b','c','d'] , 75),self.qualified_symbol_name,
                      RevisionInfo(who=self.username, what="init qualified", when=self.time_now))

    def tearDown(self):
        self.arctic.delete_library(self.library_name)
        lu.logger.info("deleted test library {}".format(self.library_name))

    def test_acquire_data(self):
        method = ArcticStorageMethod(self.arctic)
        result  = method.acquireContent("{}/symbol".format(self.library_name),{})
        data_frame = result.content
        self.assertFalse(result.header.path[0] == '/')
        self.assertIs(3,len(data_frame.columns))
        self.assertIsNone(result.header.description)
        self.assertEqual(result.header.name,result.header.path)


    def test_history(self):
        method = ArcticStorageMethod(self.arctic)
        url = MatrixUrl("{}/symbol".format(self.library_name))
        result = method.history(url)
        self.assertEquals(1,len(result))


    def test_acquire_data_with_qualified_symbol(self):
        method = ArcticStorageMethod(self.arctic)
        url = "{}/{}".format(self.library_name,self.qualified_symbol_name_as_path)
        result  = method.acquireContent(url,{})
        data_frame = result.content
        self.assertIs(4,len(data_frame.columns))


    def test_revision_storage(self):
        time_now = datetime.datetime.now()
        info = RevisionInfo(who="aPerson",what="a Change", when=time_now)

        metadata = {}
        add_revision_to_metadata(Revision("1",info),metadata)
        info2 = RevisionInfo(who="anotherPerson", what="anotherChange", when=time_now)
        add_revision_to_metadata( Revision("2", info2), metadata)

        revs = get_revisions_from_metadata(metadata)
        self.assertEquals(2,len(revs))
        revision = revs[0]
        self.assertEquals("1",revision.id)
        self.assertEquals("a Change", revision.revision_info.what)
        self.assertEquals("aPerson", revision.revision_info.who)
        self.assertEquals(time_now, revision.revision_info.when)

        revision = revs[1]
        self.assertEquals("2",revision.id)
        self.assertEquals("anotherChange", revision.revision_info.what)
        self.assertEquals("anotherPerson", revision.revision_info.who)
        self.assertEquals(time_now, revision.revision_info.when)



    def test_acquire_missing_data(self):
        method = ArcticStorageMethod(self.arctic)
        with self.assertRaisesRegexp(StorageMethod.ResourceException,'^ticker nosymbol not found$'):
            method.acquireContent("{}/nosymbol".format(self.library_name),{})
        with self.assertRaisesRegexp(StorageMethod.ResourceException,'^library nolib not found in library list'):
            method.acquireContent("nolib/nosymbol".format(self.library_name),{})