def add_one_stock_daily_data_wrapper(filepath, type, exchange_place, index, date): pid = os.getpid() code = os.path.split(filepath)[-1].split(".csv")[0] arc = Arctic(ARCTIC_SRV) try: if index % 100 == 0: print("index = {}, filepath = {}".format(index, filepath)) error_index_list = add_one_stock_daily_data(filepath, type, exchange_place, arc, date) if error_index_list is not None and len(error_index_list) > 0: f = open( os.path.join( LOG_FILE_PATH, "temp_timestamp_error_{0}_{1}_{2}.txt".format( pid, date, type)), "a+") f.write("{}, {}, {}\n".format(filepath, error_index_list, exchange_place + "_" + code)) f.close() except Exception as e: info = traceback.format_exc() print("error:" + str(e)) f = open( os.path.join(LOG_FILE_PATH, "temp_fail_{0}_{1}_{2}.txt".format(pid, date, type)), "a+") f.write("fail:" + str(filepath) + "\n" + str(e) + "\n" + str(info) + "\n") f.close() finally: arc.reset()
def __init__(self, save_to_file: bool = True, host: str = 'localhost', library: str = 'tick_blotter', collection: str = 'test_blotter') -> None: self.db = Arctic(host) self.db.initialize_library(library, lib_type=TICK_STORE) self.store = self.db[library] self.collection = collection
class Store: """ freq in ['min', 'hour', 'day'] """ chunk_size = {'min': 'D', 'hour': 'D', 'day': 'M'} def __init__(self, library=default_library, db='localhost', **kwargs): self.conn = Arctic(db) def write(self, symbol, data, freq='min', what='TRADES'): lib_name = f'{what}_{freq}' if lib_name in self.conn.list_libraries(): lib = self.conn[lib_name] lib.append(symbol, data) else: self.conn.initialize_library(lib_name) lib = self.conn[lib_name] lib.write(symbol, data, lib_type=CHUNK_STORE, chunk_size=self.chunk_size['freq']) def read(self, symbol, freq='min', what='TRADES'): lib_name = f'{what}_{freq}' lib = self.conn[lib_name] return lib.read(symbol, **kwargs) def check_data_availability( self, symbol, ): pass
def __init__(self, use_arctic=False, z_score=True, alpha=None): """ Simulator constructor :param use_arctic: If TRUE, Simulator creates a connection to Arctic, Otherwise, no connection is attempted :param z_score: If TRUE, normalize data with z-score, ELSE use min-max scaler """ self._scaler = StandardScaler() if z_score else MinMaxScaler() self.cwd = os.path.dirname(os.path.realpath(__file__)) self.ema = load_ema(alpha=alpha) try: if use_arctic: print('Attempting to connect to Arctic...') self.arctic = Arctic(MONGO_ENDPOINT) self.arctic.initialize_library(ARCTIC_NAME, lib_type=TICK_STORE) self.library = self.arctic[ARCTIC_NAME] print('Connected to Arctic.') else: self.arctic = self.library = None except Exception as ex: self.arctic = self.library = None print('Unable to connect to Arctic database') print(ex)
def __init__(self): try: print('Attempting to connect to Arctic...') self.scaler = MinMaxScaler() self.arctic = Arctic(MONGO_ENDPOINT) self.arctic.initialize_library(ARCTIC_NAME, lib_type=TICK_STORE) self.library = self.arctic[ARCTIC_NAME] self.reference_data_old_names = [ 'system_time', 'day', 'coinbase_midpoint' ] self.reference_data_new_names = ['t', 'd', 'm'] self.number_of_workers = cpu_count() self.queue = Queue(maxsize=self.number_of_workers) self.return_queue = Queue(maxsize=self.number_of_workers) self.workers = [ Process(name='Process-%i' % num, args=(self.queue, self.return_queue), target=self._do_work) for num in range(self.number_of_workers) ] print('Connected to Arctic.') except Exception as ex: self.arctic, self.library, self.workers = None, None, None print('Unable to connect to Arctic database') print(ex)
def __init__(self, stocks=['QQQ', 'TQQQ', 'SQQQ'], db='sqlite', storage_dir=None, db_file='stock_data.sqlite'): self.db = db if storage_dir is None: home_dir = os.path.expanduser("~") self.storage_dir = os.path.join(home_dir, '.yfinance_data') if not os.path.exists(self.storage_dir): os.makedirs(self.storage_dir) else: self.storage_dir = storage_dir if db == 'sqlite': self.db_file = db_file # 4 slashes for absolute path: https://docs.sqlalchemy.org/en/13/core/engines.html#sqlite self.e = create_engine('sqlite:///{}/{}'.format(self.storage_dir, self.db_file)) self.con = self.e.connect() else: self.e = None self.con = None if db == 'arctic': self.store = Arctic('localhost') self.store.initialize_library('yfinance_stockdata') self.library = self.store['yfinance_stockdata'] self.stocks = stocks
class TickBlotter(AbstractBaseBlotter): def __init__(self, save_to_file: bool = True, host: str = 'localhost', library: str = 'tick_blotter', collection: str = 'test_blotter') -> None: self.db = Arctic(host) self.db.initialize_library(library, lib_type=TICK_STORE) self.store = self.db[library] self.collection = collection def write_to_file(self, data: Dict[str, Any]) -> None: data['index'] = pd.to_datetime(data['time'], utc=True) self.store.write(self.collection, [data]) def save(self) -> None: data = [] for d in self.blotter: d.update({'index': pd.to_datetime(d['time'], utc=True)}) data.append(d) self.store.write(self.collection, data) def delete(self, querry: Dict) -> str: raise NotImplementedError def clear(self) -> str: raise NotImplementedError
class Database(object): def __init__(self, sym, exchange): self.counter = 0 self.data = list() self.sym = sym self.tz = TIMEZONE self.exchange = exchange if RECORD_DATA: self.db = Arctic(MONGO_ENDPOINT) self.db.initialize_library(ARCTIC_NAME, lib_type=TICK_STORE) self.collection = self.db[ARCTIC_NAME] print('\n%s is recording %s\n' % (self.exchange, self.sym)) else: self.db = None self.collection = None def new_tick(self, msg): """ If RECORD_DATA is TRUE, add streaming ticks to a list After the list has accumulated BATCH_SIZE ticks, insert batch into the Arctic Tick Store :param msg: incoming tick :return: void """ if self.db is not None: self.counter += 1 msg['index'] = dt.now(tz=self.tz) msg['system_time'] = str(msg['index']) self.data.append(msg) if self.counter % BATCH_SIZE == 0: print('%s added %i msgs to Arctic' % (self.sym, self.counter)) self.collection.write(self.sym, self.data) self.counter = 0 self.data.clear()
def getArcticLibraries(): print(">>> Function called getArcticLibraries()") # Connect to local MONGODB store = Arctic('localhost') print "+ Arctic connected to MongoDB at localhost" print("+ Requesting libraries from Arctic store") return store.list_libraries()
def sync(): store = Arctic('localhost') store.initialize_library('Bitmex') library = store['Bitmex'] df = get_candle_pandas() print (df) library.write('XBTUSD', df, metadata={'source': 'Bitmex'})
def show(): # Connect to Local MONGODB store = Arctic('localhost') # Create the library - defaults to VersionStore store.initialize_library('Bitmex') # Access the library library = store['Bitmex'] #library.write('XBTUSD', df, metadata={'source': 'Bitmex'}) # Reading the data item = library.read('XBTUSD') xbtusd = item.data metadata = item.metadata print (xbtusd) print (metadata) xbtusd['ret'] = -1+xbtusd['close']/xbtusd['close'].shift(1) from math import sqrt xbtusd['vol10'] = sqrt(260)*xbtusd['ret'].rolling(10).std(ddof=0) xbtusd['vol30'] = sqrt(260)*xbtusd['ret'].rolling(30).std(ddof=0) #print (volList) #plt.plot(df.index, df['close'], label='price') plt.plot(xbtusd.index, xbtusd['vol10'], label='vol10') plt.plot(xbtusd.index, xbtusd['vol30'], label='vol30') #plt.plot(xbtusd['ret']) plt.ylabel('vol') plt.xlabel('Date') plt.legend(loc=0) plt.show()
def __init__(self, TYPE): """ :param TYPE: 'BS', 'IS', 'CF' """ ############ SETTING ############# self.config = GetConfig() self.TYPE = TYPE # 'BS', 'IS', 'CF' self.MONGO = self.config.MONGO self.CSV = self.config.CSV self.RAW = False self.outdir = self.config.fund_dir self.encode = self.config.encode self.proxypool = self.config.proxypool ############ CHANGE ABOVE SETTING ############# if self.MONGO: from arctic import Arctic # mongod --dbpath D:/idwzx/project/arctic a = Arctic(self.config.ahost) a.initialize_library('ashare_{}'.format(self.TYPE)) self.lib = a['ashare_{}'.format(self.TYPE)] self.result_dict = {}
class ArcticBinary: def __init__(self, lib_name: str = _ARCTIC_BINARY_LIBRARY, mongo_db: str = "auto"): """假定一个 instance 只操作一个 library mongo_db : "auto" 根据环境是否为 colab 自动选择 google 还是 local "google" 选择 google 的 mongo "intranet" 选择机房中的Mongo """ # 这里 暂时先 hardcode arctic 所使用的 mongo 地址 mongo_db_conn_str = get_mongo_admin_conn_str() if mongo_db == "google": mongo_db_conn_str = get_google_mongo_conn_str() elif mongo_db == "intranet": mongo_db_conn_str = get_intranet_mongo_conn_str() self._store = Arctic(mongo_db_conn_str) if not self._store.library_exists(lib_name): self._store.initialize_library(lib_name, VERSION_STORE) self._lib = self._store[lib_name] def write_bin_object(self, bin_data: bytes, symbol: str): self._lib.write(symbol, bin_data) def read_bin_object(self, symbol: str) -> bytes: return self._lib.read(symbol).data def has_symbol(self, symbol: str) -> bytes: return self._lib.has_symbol(symbol)
class ArcticSaver(AbstractBaseSaver): """ Serializer for Arctic VersionStore. """ def __init__(self, host: str = 'localhost', library: str = 'test_log', note: str = '') -> None: """ Library given at init, collection determined by self.name_str. """ self.host = host self.library = library self.db = Arctic(host) self.db.initialize_library(library) self.store = self.db[library] super().__init__(note) def save(self, df: pd.DataFrame, what: str, contract_str: str, note: str = '') -> None: self.store.write(self.name_str(what, contract_str), df) def keys(self) -> List[str]: return self.store.list_symbols() def read(self, key: str) -> pd.DataFrame: return self.store.read(key) def __str__(self): return (f'ArcticSaver(host={self.host}, library={self.library}, ' f'note={self.note})')
def test_multiprocessing_safety(mongo_host, library_name): # Create/initialize library at the parent process, then spawn children, and start them aligned in time total_processes = 64 total_writes_per_child = 100 register_get_auth_hook(my_auth_hook) global MY_ARCTIC MY_ARCTIC = Arctic(mongo_host=mongo_host) MY_ARCTIC.initialize_library(library_name, VERSION_STORE) assert isinstance(MY_ARCTIC.get_library(library_name), VersionStore) processes = [Process(target=f, args=(library_name, total_writes_per_child, True)) for _ in range(total_processes)] for p in processes: p.start() for p in processes: p.join() for p in processes: assert p.exitcode == 0 assert isinstance(MY_ARCTIC.get_library(library_name), VersionStore)
def __init__(self, collection_name, database_name=None, host=None, port=None): database_name, host, _port_not_used = mongo_defaults(db=database_name, host=host, port=port) # Arctic doesn't accept a port store = Arctic(host) library_name = database_name + "." + collection_name store.initialize_library( library_name) # will this fail if already exists?? library = store[library_name] self.database_name = database_name self.collection_name = collection_name self.host = host self.store = store self.library_name = library_name self.library = library
class Database(object): def __init__(self, sym, exchange): self.counter = 0 self.data = list() self.sym = sym self.tz = tz.utc self.exchange = exchange if configs.RECORD_DATA: self.db = Arctic(configs.MONGO_ENDPOINT) self.db.initialize_library(configs.ARCTIC_NAME, lib_type=TICK_STORE) self.collection = self.db[configs.ARCTIC_NAME] print('%s is recording %s\n' % (self.exchange, self.sym)) else: self.db = None self.collection = None def new_tick(self, msg): if self.db is not None: self.counter += 1 msg['index'] = dt.now(tz=self.tz) msg['system_time'] = str(msg['index']) self.data.append(msg) if self.counter % configs.BATCH_SIZE == 0: print('%s added %i msgs to Arctic' % (self.sym, self.counter)) self.collection.write(self.sym, self.data) self.counter = 0 self.data.clear()
def __init__(self, use_arctic=False): """ :param use_arctic: If True, Simulator creates a connection to Arctic, Otherwise, no connection is attempted """ self._avg = None self._std = None self.cwd = os.path.dirname(os.path.realpath(__file__)) self.z_score = lambda x: (x - self._avg) / self._std try: if use_arctic: print('Attempting to connect to Arctic...') self.arctic = Arctic(MONGO_ENDPOINT) self.arctic.initialize_library(ARCTIC_NAME, lib_type=TICK_STORE) self.library = self.arctic[ARCTIC_NAME] print('Connected to Arctic.') else: print('Not connecting to Arctic') self.arctic, self.library = None, None except Exception as ex: self.arctic, self.library = None, None print('Unable to connect to Arctic database') print(ex)
def import_data(self, doc_type_l=["Tick", "Transaction", "Order"]): # clear all the old files for fp in LOG_FILE_PATH, DATA_INFO_PATH, DATA_FINISH_INFO_PATH, DATA_PATH: self._get_empty_folder(fp) arc = Arctic(ARCTIC_SRV) for doc_type in DOC_TYPE: # arc.initialize_library(get_library_name(doc_type), lib_type=CHUNK_STORE) arc.set_quota(get_library_name(doc_type), MAX_SIZE) arc.reset() # doc_type = 'Day' for doc_type in doc_type_l: date_list = list( set([ int(path.split("_")[0]) for path in os.listdir(DATABASE_PATH) if doc_type in path ])) date_list.sort() date_list = [str(date) for date in date_list] f = open(ALL_STOCK_PATH, "r") stock_name_list = [lines.split("\t")[0] for lines in f.readlines()] f.close() stock_name_dict = { "SH": [ stock_name[2:] for stock_name in stock_name_list if "SH" in stock_name ], "SZ": [ stock_name[2:] for stock_name in stock_name_list if "SZ" in stock_name ], } lib_name = get_library_name(doc_type) a = Arctic(ARCTIC_SRV) # a.initialize_library(lib_name, lib_type=CHUNK_STORE) stock_name_exist = a[lib_name].list_symbols() lib = a[lib_name] initialize_count = 0 for stock_name in stock_name_list: if stock_name not in stock_name_exist: initialize_count += 1 # A placeholder for stocks pdf = pd.DataFrame(index=[pd.Timestamp("1900-01-01")]) pdf.index.name = "date" # an col named date is necessary lib.write(stock_name, pdf) print("initialize count: {}".format(initialize_count)) print("tasks: {}".format(date_list)) a.reset() # date_list = [files.split("_")[0] for files in os.listdir("./raw_data_price") if "tar" in files] # print(len(date_list)) date_list = ["20201231"] # for test Parallel(n_jobs=min(2, len(date_list)))( delayed(add_data)(date, doc_type, stock_name_dict) for date in date_list)
def open(self, store='chunkstore'): self.db = Arctic('localhost') try: self.store = self.db[store] except: self.db.initialize_library(store, lib_type=CHUNK_STORE) self.store = self.db[store] self.store._arctic_lib.set_quota(maxDBStorage * 1024 * 1024 * 1024)
def __init__(self, host): #self.MONGOHOST = 'localhost' """Initializes the store here if it hasn't already z""" try: register_library_type(FunStore._LIBRARY_TYPE, FunStore) except Exception: pass self.store = Arctic(host)
def initialize_db(self, lib): if lib: db = Arctic('localhost') if lib not in db.list_libraries(): self.logger.info( 'Data library \'%s\' does not exist -- creating it', lib) db.initialize_library(lib, lib_type=TICK_STORE) self.dlib = lib self.dbconn = db[lib]
def ingest_trades(filename: str, library: str, symbol: str) -> None: store = Arctic('localhost') logger.info(f"Saving to library: {library}, symbol: {symbol}") # Defaults to VersionStore store.initialize_library(library) library = store[library] df = pd.read_csv(filename, names=COLUMN_NAMES) df.time = pd.to_datetime(df.time, unit='s') library.write(symbol, df, metadata={'source': 'csv'})
class ArcticStoreDatabase(Database): def __init__(self): super().__init__() self.db = None self.store = None def open(self, store='chunkstore'): self.db = Arctic('localhost') try: self.store = self.db[store] except: self.db.initialize_library(store, lib_type=CHUNK_STORE) self.store = self.db[store] self.store._arctic_lib.set_quota(maxDBStorage * 1024 * 1024 * 1024) def close(self): pass #no need to close arctic connection def remove(self, key): self.store.delete(key) #used for debugging def getMeatdata(self, key): return self.store.read_metadata(key) def setMetadata(self, key, metadata): self.store.write_metadata(key, metadata) def _save(self, key, data): if self.has_key(key): self.store.append(key, data) else: self.store.write(key, data, chunk_size=chunkSizes.get(key, 'M')) def get(self, key, start=None, end=None, iterator=False): if not iterator: return self.store.read(key, chunk_range=DateRange( start, end, CLOSED_CLOSED)) else: return self.store.iterator(key, chunk_range=DateRange( start, end, CLOSED_CLOSED)) def getLatestRow(self, key): latestDate = self.store.read_metadata(key)['end'] return self.get(key, start=latestDate, end=None) def getFirstRow(self, key): firstDate = self.store.read_metadata(key)['start'] return self.get(key, start=None, end=firstDate) def has_key(self, key): return self.store.has_symbol(key) def list_keys(self): return self.store.list_symbols()
def insert_random_data(config, args, n_rows): store = Arctic(args.mongodb, app_name="benchmark") lib_name = lib_name_from_args(config) store.delete_library(lib_name) store.initialize_library(lib_name, segment='month') lib = store[lib_name] for sym in range(args.symbols): df = gen_oneminute_dataset(n_row=n_rows, n_col=n_rows, dense=args.dense) lib.write('sym' + str(sym), df)
def __init__(self, lib: str, host: str = 'localhost') -> None: """ Library name is whatToShow + barSize, eg. TRADES_1_min BID_ASK_1_hour MIDPOINT_30_secs """ lib = lib.replace(' ', '_') self.db = Arctic(host) self.db.initialize_library(lib) self.store = self.db[lib]
def _arctic_loader(): host = Arctic(arctic_opts['host']) lib = host.get_library(arctic_opts['library']) read_kwargs = {} start, end = map(arctic_opts.get, ['start', 'end']) if start and end: read_kwargs['chunk_range'] = pd.date_range(start, end) data = lib.read(arctic_opts['node'], **read_kwargs) if isinstance(data, VersionedItem): data = data.data return data
def setUp(self): self.robot_user = "******" self.import_comment = "importcomment" self.arctic = Arctic('localhost') self.library_name = marc.random_library(self.arctic) lu.logger.info("Created test library {}".format(self.library_name)) self.arctic.initialize_library(self.library_name) simple_pd =pu.create_simple_series(['a','b','c'],5) lib = self.arctic[self.library_name] import_pandas(lib,simple_pd,"symbol",RevisionInfo(who=self.robot_user,what=self.import_comment, when = datetime.datetime.now())) import_pandas(lib,simple_pd,"ES.SETL.EOD",RevisionInfo(who=self.robot_user,what="import something else", when = datetime.datetime.now()))
def _get_lib(lib_name: "lib name(str)" = "default", lib_type: "lib type" = VERSION_STORE): client = MongoClient(host=config.MONGO_HOST, port=27017, username=config.MONGO_USER, password=config.MONGO_PWD, authSource=config.MONGO_AUTHDB) a = Arctic(client) if not a.library_exists(lib_name): a.initialize_library(lib_name, lib_type=lib_type) return a[lib_name]
def init_db_connection(self): """ Initiate database connection :return: (void) """ print("init_db_connection for {}...".format(self.sym)) try: self.db = Arctic(MONGO_ENDPOINT) self.db.initialize_library(ARCTIC_NAME, lib_type=TICK_STORE) self.collection = self.db[ARCTIC_NAME] except PyMongoError as e: print("Database.PyMongoError() --> {}".format(e))
def startDB(self, storename='fx'): self.con = subprocess.Popen("%s %s %s" % (self.mongod, "--dbpath", self.dbPath), shell=True) self.store = Arctic(self.address) if not self.store.library_exists('fx'): self.store.initialize_library(storename) self.library = self.store[storename] self.rc = self.con.returncode
def read_from_db(target_building, start_time=None, end_time=None): ''' load the data from for tgt_bldg return: { point name: data } data is in pandas.DataFrame format with two columns ['date', 'data'] ''' if isinstance(start_time, arrow.Arrow): start_time = start_time.datetime elif isinstance(start_time, (dt, date)): pass elif start_time == None: pass else: raise ValueError('the type of time value is unknown: {0}'.format( type(start_time))) if isinstance(end_time, arrow.Arrow): end_time = end_time.datetime elif isinstance(end_time, (dt, date)): pass elif end_time == None: pass else: raise ValueError('the type of time value is unknown: {0}'.format( type(end_time))) if start_time and end_time: date_range = DateRange(start=start_time, end=end_time) else: date_range = None print('loading timeseries data from db for %s...' % target_building) conn = Arctic('localhost') if target_building not in conn.list_libraries(): raise ValueError('%s not found in the DB!' % target_building) else: lib = conn[target_building] srcids = lib.list_symbols() res = {} for srcid in srcids: data = lib.read(srcid, chunk_range=date_range) if len(data) == 0: print('WARNING: {0} has empty data.'.format(srcid)) #pdb.set_trace() continue res[srcid] = data print('correctly done') return res
def main(): parser = argparse.ArgumentParser(prog="store", description='Store data to DB') parser.add_argument('--host', help="MongoDB host", default=MONGO_HOST_DEFAULT, type=str) parser.add_argument('--updater', help="Updater", default=UPDATER_DEFAULT, type=str) parser.add_argument('-s', '--source', help="Source", default=SOURCE_DEFAULT, type=str) parser.add_argument('--symbols', help="Symbol", default=SYMBOLS_DEFAULT, type=str) parser.add_argument('--start', help="Start date", default='', type=str) parser.add_argument('--end', help="End date", default='', type=str) parser.add_argument('--freq', help="Freq", default='', type=str) parser.add_argument('--max_rows', help="max_rows", default=10, type=int) parser.add_argument('--max_columns', help="max_columns", default=6, type=int) parser.add_argument('--api_key', help="API key", default='', type=str) parser.add_argument('--expire_after', help="Cache expiration ('0': no cache, '-1': no expiration, 'HH:MM:SS.X': expiration duration)", default='24:00:00.0', type=str) args = parser.parse_args() pd.set_option('max_rows', args.max_rows) pd.set_option('expand_frame_repr', False) pd.set_option('max_columns', args.max_columns) if args.start != '': start = pd.to_datetime(args.start) else: start = None if args.end != '': end = pd.to_datetime(args.end) else: end = None if args.freq != '': freq = args.freq else: freq = None symbols = args.symbols.split(',') session = get_session(args.expire_after, 'cache') my_updater = updater(args.updater, session=session) if args.api_key != '': my_updater.set_credentials(api_key=args.api_key) store = Arctic(args.host) library_name = my_updater.library_name(args.source, freq) print(library_name) store.initialize_library(library_name) library = store[library_name] for symbol in symbols: update(library, my_updater, symbol, start, end, freq, args.source.lower())
def __init__(self, database_name, collection_name, host = DEFAULT_MONGO_HOST): if database_name is None: database_name = DEFAULT_DB store = Arctic(host) library_name = database_name+"."+collection_name store.initialize_library(library_name) # will this fail if already exists?? library = store[library_name] self.database_name = database_name self.collection_name = collection_name self.host = host self.store = store self.library_name = library_name self.library = library
def test_multiprocessing_safety_parent_children_race(mongo_host, library_name): # Create Arctic and directly fork/start children (no wait) total_iterations = 12 total_processes = 6 total_writes_per_child = 20 global MY_ARCTIC for i in range(total_iterations): processes = list() MY_ARCTIC = Arctic(mongo_host=mongo_host) for j in range(total_processes): p = Process(target=f, args=(library_name, total_writes_per_child, False)) p.start() # start directly, don't wait to create first all children procs processes.append(p) MY_ARCTIC.initialize_library(library_name, VERSION_STORE) # this will unblock spinning children for p in processes: p.join() for p in processes: assert p.exitcode == 0 MY_ARCTIC.reset() assert isinstance(MY_ARCTIC.get_library(library_name), VersionStore)
class TimeSuiteWrite(object): params = list(range(len(TEST_SIZES))) param_names = ['5K * 10^'] def setup(self, arg): self.store = Arctic("127.0.0.1") self.store.delete_library('test.lib') self.store.initialize_library('test.lib') self.lib = self.store['test.lib'] def teardown(self, arg): self.store.delete_library('test.lib') self.lib = None def time_write_dataframe_random(self, idx): self.lib.write('df_bench_random', df_random[idx]) def time_write_series_random(self, idx): self.lib.write('series_bench_random', s_random[idx]) def time_write_dataframe_compressible(self, idx): self.lib.write('df_bench_compressible', df_compress[idx]) def time_write_series_compressible(self, idx): self.lib.write('series_bench_compressible', s_compress[idx])
class TimeSuiteAppend(object): params = list(range(len(TEST_SIZES))) param_names = ['5K * 10^'] def __init__(self): self.store = Arctic("127.0.0.1") def setup(self, idx): self.store.delete_library('test.lib') self.store.initialize_library('test.lib') self.lib = self.store['test.lib'] self.lib.write('test_df', df_random[idx]) def teardown(self, arg): self.store.delete_library('test.lib') self.lib = None def time_append_dataframe(self, idx): self.lib.append('test_df', df_random[idx])
def test1(self): self.assertEqual(Arctic.get_min_elec_dist([[0, 0], [1, 0], [1, 1], [1, 2], [0, 2]]), 1.00)
def test2(self): self.assertEqual( Arctic.get_min_elec_dist([[1.0, 1.0], [30.91, 8], [4.0, 7.64], [21.12, 6.0], [11.39, 3.0], [5.31, 11.0]]), 10.18, )
def get_stock_history(ticker, start_date, end_date): data = ystockquote.get_historical_prices(ticker, start_date, end_date) df = pd.DataFrame(collections.OrderedDict(sorted(data.items()))).T df = df.convert_objects(convert_numeric=True) return df ################################################ # VersionStore: Storing and updating stock data ################################################ arctic = Arctic('localhost') # Create a VersionStore library arctic.delete_library('jblackburn.stocks') arctic.initialize_library('jblackburn.stocks') arctic.list_libraries() stocks = arctic['jblackburn.stocks'] # get some prices aapl = get_stock_history('aapl', '2015-01-01', '2015-02-01') aapl # store them in the library stocks.write('aapl', aapl, metadata={'source': 'YAHOO'})
def setup(self, arg): self.store = Arctic("127.0.0.1") self.store.delete_library('test.lib') self.store.initialize_library('test.lib') self.lib = self.store['test.lib']
def __init__(self): self.store = Arctic("127.0.0.1")
params["start"] += 200 return price_list from arctic import Arctic def download_daily_bars(instrument, start, end): bars = get_historical_price(instrument,start,end) dump = json.dumps(bars, ensure_ascii=False).encode('utf-8') df = pd.read_json(dump) print(df.head()) if df.empty: return df df = df.set_index('Date') return df store = Arctic('localhost') if 'KRX_G' not in store.list_libraries(): store.initialize_library('KRX_G') lib = store['KRX_G'] krx = pd.read_csv('krx_market_symbols.csv', dtype=object) stocks = [x for x in krx['code_google']] print(len(stocks), " symbols") begin = datetime.date(2000,1,1) end = datetime.date(2016,5,30) missing = ['KRX:152550'] #for i, stock in enumerate(stocks[160:769]): for i, stock in enumerate(missing): print("%d th code=%s" % (i, stock))
pd.set_option('max_rows', 10) pd.set_option('expand_frame_repr', False) pd.set_option('max_columns', 6) from arctic_updater.updaters.truefx import TrueFXUpdater logging.basicConfig(level=logging.DEBUG) logger = logging.getLogger(__name__) my_updater = TrueFXUpdater() symbol = 'EURUSD' year, month = 2015, 11 %time df = my_updater._read_one_month(symbol, year, month) # Arctic (MongoDB) from arctic import Arctic store = Arctic('localhost') library_name = 'test' store.initialize_library(library_name) library = store[library_name] %time library.write(symbol, df) %time df_retrieved = library.read(symbol).data # HDF5 filename = my_updater._filename(symbol, year, month, '.h5') %time df.to_hdf(filename, "data", mode='w', complevel=0, complib='zlib', format='table') %time df_retrieved = pd.read_hdf(filename) # Make unique index # http://stackoverflow.com/questions/34575126/create-a-dataframe-with-datetimeindex-with-unique-values-by-adding-a-timedelta/34576154#34576154 df = df.reset_index()
from arctic import Arctic # Connect to Local MONGODB store = Arctic('localhost') # Create the library - defaults to VersionStore store.initialize_library('NASDAQ') # Access the library library = store['NASDAQ'] # Load some data - maybe from Quandl import quandl quandl.ApiConfig.api_key = "Cxzxjy2eHKXgwMjVFhbH" aapl = quandl.get("GOOG/NASDAQ_AAPL") # Store the data in the library library.write('AAPL', aapl, metadata={'source': 'Quandl'}) # Reading the data item = library.read('AAPL') aapl = item.data metadata = item.metadata
class MongoDB(object): # to start server - cd to "C:\Program Files\MongoDB\Server\4.0\bin" # then use mongod --dbpath "path to db" when starting database server to point to the correct save location # use dataframe.to_dict('records') to convert to dicts to insert to DB. The DB is stored in mongoDB software # use Arctic from AHL to store time series data. It uses multiIndex pd.Dataframe to store, and other data goes # into meta data as a dict. It will open its own server, so no need to run # https://www.mongodb.com/json-and-bson # https://stackoverflow.com/questions/20796714/how-do-i-start-mongo-db-from-windows # To back up database, navigate to bin folder of mongodb, then run "mongodump --out ..." where ... is the output # folder to dump # To load into another base, navigate to bin folder again, and use "mongorestore --drop ..." where ... is the # folder where database backup folders where dumped # MUST RUN CMD AS ADMINISTRATOR!!!!!!! def __init__(self): self.conn = Arctic("127.0.0.1") def verify_library(self, library): """ Method to test if library exists in database""" libs = self.conn.list_libraries() if library in libs: return # if database doesnt exist, alert and ask if we should create else: print('"%s" library does not exist in server data path' % library) create = input('create "%s"? (y, n)' % library) if create.lower() == 'y': self.conn.initialize_library(library) else: return @staticmethod def getMultiIndex(data, index, columns=None): """ Method to convert dataframe to MultiIndex data: pd.DataFrame data to be MultiInexed index: list list of strings containg names of columns to be used as MultiIndex (in order left to right) columns: """ data = data.set_index(index) return data @timeMe def save(self, data, library, meta_data={}, append=True): """ Method to save to mongodb library using arctic database. Parameters ---------- data: pd.DataFrame (MultiIndex) DataFrame containing data for database in MultiIndex structure. Outer index must be the index used for the arctic "symbol" in the "library" library: str which library to write to meta_data: dict Dictionary of meta data values to inlcude in save. If not provided will maintain existing metadata in database or set to None. Keys match "symbol" from data outer index. append: bool True will append data to existing data for each "symbol" in database False will replace data entirely """ self.verify_library(library) if isinstance(data, pd.DataFrame): # get unique symbols to write indicies = list(data.index.unique()) # take first index as "symbol" for database symbols = indicies if isinstance(indicies[0], str) else np.unique([x[0] for x in indicies]) symbol_list = self.conn[library].list_symbols() # fill in blank metadata for symbols not in metadata noMeta = np.array(symbols)[~np.in1d(symbols, list(meta_data.keys()))] meta_data.update({x: None for x in noMeta}) for sym in symbols: data_cut = data.loc[sym] data_cut = pd.DataFrame(data_cut).T if isinstance(data_cut, pd.Series) else data_cut if sym in symbol_list: # get current data for symbol db = self.conn[library].read(sym) db_data = db.data # if meta data is present in database but is None in metadata fed in then take from database meta_data[sym] = db.metadata # if the symbol is already in the database and we want to "update" the series rather than simply replace if sym in symbol_list and append: data_post = pd.concat([db_data, data_cut], axis=0) # remove duplicates and keep latest data if len(data_post.index.unique()) == 1: data_post = data_post.drop_duplicates(keep='last') else: data_post = data_post.groupby(data_post.index).last() else: data_post = data_cut # sort the data by the indicies data_post = data_post.sort_index() self.conn[library].write(sym, data_post, metadata=meta_data[sym]) @timeMe def read(self, library, symbols=None): """ Method to read from mongodb library using arctic database. Parameters ---------- library: str which library to write to symbols: list of "symbols" in "library" to read """ if symbols is None: symbols = self.conn[library].list_symbols() data = {} for sym in symbols: db = self.conn[library].read(sym) data[sym] = {} data[sym]['data'] = db.data if db.metadata is not None: data[sym].update(db.metadata) return data
def __init__(self): self.conn = Arctic("127.0.0.1")
# Loading data ################################################ def get_stock_history(ticker, start_date, end_date): data = ystockquote.get_historical_prices(ticker, start_date, end_date) df = pandas.DataFrame(collections.OrderedDict(sorted(data.items()))).T df = df.convert_objects(convert_numeric=True) return df ################################################ # VersionStore: Storing and updating stock data ################################################ arctic = Arctic('localhost') lst = arctic.list_libraries() if 'KRX' not in lst: arctic.initialize_library('KRX') ################################# # Dealing with lots of data ################################# #NSYE library lib = arctic['KRX'] def load_all_stock_history_KRX(): #
# # Arctic Key-Value store # from datetime import datetime as dt import pandas as pd from arctic import Arctic # Connect to the mongo-host / cluster store = Arctic(mongo_host) # Data is grouped into 'libraries'. # Users may have one or more named libraries: store.list_libraries() # Create a library store.initialize_library('username.scratch') # Get a library # library = m['username.<library>'] library = store['username.scratch'] # Store some data in the library df = pd.DataFrame({'prices': [1, 2, 3]}, [dt(2014, 1, 1), dt(2014, 1, 2), dt(2014, 1, 3)]) library.write('SYMBOL', df) # Read some data from the library # (Note the returned object has an associated version number and metadata.)
self._arctic_lib.check_quota() self._collection.insert_one(to_store) @mongo_retry def delete(self, query): """ Simple delete method """ self._collection.delete_one(query) # Hook the class in for the type string 'CustomArcticLibType' register_library_type(CustomArcticLibType._LIBRARY_TYPE, CustomArcticLibType) # Create a Arctic instance pointed at a mongo host store = Arctic(mongo_host) ### Initialize the library # Map username.custom_lib -> CustomArcticLibType store.initialize_library("username.custom_lib", CustomArcticLibType._LIBRARY_TYPE) # Now pull our username.custom_lib ; note that it has the: # - query(...) # - store(...) # - delete(...) # API we defined above lib = store["username.custom_lib"] # Store some items in the custom library type lib.store(Stuff("thing", dt(2012, 1, 1), object()))