Exemple #1
0
def add_one_stock_daily_data_wrapper(filepath, type, exchange_place, index,
                                     date):
    pid = os.getpid()
    code = os.path.split(filepath)[-1].split(".csv")[0]
    arc = Arctic(ARCTIC_SRV)
    try:
        if index % 100 == 0:
            print("index = {}, filepath = {}".format(index, filepath))
        error_index_list = add_one_stock_daily_data(filepath, type,
                                                    exchange_place, arc, date)
        if error_index_list is not None and len(error_index_list) > 0:
            f = open(
                os.path.join(
                    LOG_FILE_PATH,
                    "temp_timestamp_error_{0}_{1}_{2}.txt".format(
                        pid, date, type)), "a+")
            f.write("{}, {}, {}\n".format(filepath, error_index_list,
                                          exchange_place + "_" + code))
            f.close()

    except Exception as e:
        info = traceback.format_exc()
        print("error:" + str(e))
        f = open(
            os.path.join(LOG_FILE_PATH,
                         "temp_fail_{0}_{1}_{2}.txt".format(pid, date, type)),
            "a+")
        f.write("fail:" + str(filepath) + "\n" + str(e) + "\n" + str(info) +
                "\n")
        f.close()

    finally:
        arc.reset()
Exemple #2
0
 def __init__(self, save_to_file: bool = True, host: str = 'localhost',
              library: str = 'tick_blotter',
              collection: str = 'test_blotter') -> None:
     self.db = Arctic(host)
     self.db.initialize_library(library, lib_type=TICK_STORE)
     self.store = self.db[library]
     self.collection = collection
Exemple #3
0
class Store:
    """
    freq in ['min', 'hour', 'day']
    """
    chunk_size = {'min': 'D', 'hour': 'D', 'day': 'M'}

    def __init__(self, library=default_library, db='localhost', **kwargs):
        self.conn = Arctic(db)

    def write(self, symbol, data, freq='min', what='TRADES'):
        lib_name = f'{what}_{freq}'
        if lib_name in self.conn.list_libraries():
            lib = self.conn[lib_name]
            lib.append(symbol, data)
        else:
            self.conn.initialize_library(lib_name)
            lib = self.conn[lib_name]
            lib.write(symbol,
                      data,
                      lib_type=CHUNK_STORE,
                      chunk_size=self.chunk_size['freq'])

    def read(self, symbol, freq='min', what='TRADES'):
        lib_name = f'{what}_{freq}'
        lib = self.conn[lib_name]
        return lib.read(symbol, **kwargs)

    def check_data_availability(
        self,
        symbol,
    ):
        pass
Exemple #4
0
 def __init__(self, use_arctic=False, z_score=True, alpha=None):
     """
     Simulator constructor
     :param use_arctic: If TRUE, Simulator creates a connection to Arctic,
                         Otherwise, no connection is attempted
     :param z_score: If TRUE, normalize data with z-score,
                     ELSE use min-max scaler
     """
     self._scaler = StandardScaler() if z_score else MinMaxScaler()
     self.cwd = os.path.dirname(os.path.realpath(__file__))
     self.ema = load_ema(alpha=alpha)
     try:
         if use_arctic:
             print('Attempting to connect to Arctic...')
             self.arctic = Arctic(MONGO_ENDPOINT)
             self.arctic.initialize_library(ARCTIC_NAME,
                                            lib_type=TICK_STORE)
             self.library = self.arctic[ARCTIC_NAME]
             print('Connected to Arctic.')
         else:
             self.arctic = self.library = None
     except Exception as ex:
         self.arctic = self.library = None
         print('Unable to connect to Arctic database')
         print(ex)
Exemple #5
0
 def __init__(self):
     try:
         print('Attempting to connect to Arctic...')
         self.scaler = MinMaxScaler()
         self.arctic = Arctic(MONGO_ENDPOINT)
         self.arctic.initialize_library(ARCTIC_NAME, lib_type=TICK_STORE)
         self.library = self.arctic[ARCTIC_NAME]
         self.reference_data_old_names = [
             'system_time', 'day', 'coinbase_midpoint'
         ]
         self.reference_data_new_names = ['t', 'd', 'm']
         self.number_of_workers = cpu_count()
         self.queue = Queue(maxsize=self.number_of_workers)
         self.return_queue = Queue(maxsize=self.number_of_workers)
         self.workers = [
             Process(name='Process-%i' % num,
                     args=(self.queue, self.return_queue),
                     target=self._do_work)
             for num in range(self.number_of_workers)
         ]
         print('Connected to Arctic.')
     except Exception as ex:
         self.arctic, self.library, self.workers = None, None, None
         print('Unable to connect to Arctic database')
         print(ex)
    def __init__(self, stocks=['QQQ', 'TQQQ', 'SQQQ'], db='sqlite', storage_dir=None, db_file='stock_data.sqlite'):
        self.db = db
        if storage_dir is None:
            home_dir = os.path.expanduser("~")
            self.storage_dir = os.path.join(home_dir, '.yfinance_data')
            if not os.path.exists(self.storage_dir):
                os.makedirs(self.storage_dir)
        else:
            self.storage_dir = storage_dir

        if db == 'sqlite':
            self.db_file = db_file
            # 4 slashes for absolute path: https://docs.sqlalchemy.org/en/13/core/engines.html#sqlite
            self.e = create_engine('sqlite:///{}/{}'.format(self.storage_dir, self.db_file))
            self.con = self.e.connect()
        else:
            self.e = None
            self.con = None

        if db == 'arctic':
            self.store = Arctic('localhost')
            self.store.initialize_library('yfinance_stockdata')
            self.library = self.store['yfinance_stockdata']

        self.stocks = stocks
Exemple #7
0
class TickBlotter(AbstractBaseBlotter):
    def __init__(self, save_to_file: bool = True, host: str = 'localhost',
                 library: str = 'tick_blotter',
                 collection: str = 'test_blotter') -> None:
        self.db = Arctic(host)
        self.db.initialize_library(library, lib_type=TICK_STORE)
        self.store = self.db[library]
        self.collection = collection

    def write_to_file(self, data: Dict[str, Any]) -> None:
        data['index'] = pd.to_datetime(data['time'], utc=True)
        self.store.write(self.collection, [data])

    def save(self) -> None:
        data = []
        for d in self.blotter:
            d.update({'index': pd.to_datetime(d['time'], utc=True)})
            data.append(d)
        self.store.write(self.collection, data)

    def delete(self, querry: Dict) -> str:
        raise NotImplementedError

    def clear(self) -> str:
        raise NotImplementedError
Exemple #8
0
class Database(object):
    def __init__(self, sym, exchange):
        self.counter = 0
        self.data = list()
        self.sym = sym
        self.tz = TIMEZONE
        self.exchange = exchange
        if RECORD_DATA:
            self.db = Arctic(MONGO_ENDPOINT)
            self.db.initialize_library(ARCTIC_NAME, lib_type=TICK_STORE)
            self.collection = self.db[ARCTIC_NAME]
            print('\n%s is recording %s\n' % (self.exchange, self.sym))
        else:
            self.db = None
            self.collection = None

    def new_tick(self, msg):
        """
        If RECORD_DATA is TRUE, add streaming ticks to a list
        After the list has accumulated BATCH_SIZE ticks, insert batch into
        the Arctic Tick Store
        :param msg: incoming tick
        :return: void
        """
        if self.db is not None:
            self.counter += 1
            msg['index'] = dt.now(tz=self.tz)
            msg['system_time'] = str(msg['index'])
            self.data.append(msg)
            if self.counter % BATCH_SIZE == 0:
                print('%s added %i msgs to Arctic' % (self.sym, self.counter))
                self.collection.write(self.sym, self.data)
                self.counter = 0
                self.data.clear()
def getArcticLibraries():
    print(">>> Function called getArcticLibraries()")
    # Connect to local MONGODB
    store = Arctic('localhost')
    print "+ Arctic connected to MongoDB at localhost"
    print("+ Requesting libraries from Arctic store")
    return store.list_libraries()
Exemple #10
0
def sync():
    store = Arctic('localhost')
    store.initialize_library('Bitmex')
    library = store['Bitmex']
    df = get_candle_pandas()
    print (df)
    library.write('XBTUSD', df, metadata={'source': 'Bitmex'})
Exemple #11
0
def show():        
    # Connect to Local MONGODB
    store = Arctic('localhost')
    # Create the library - defaults to VersionStore
    store.initialize_library('Bitmex')
    # Access the library
    library = store['Bitmex']
    #library.write('XBTUSD', df, metadata={'source': 'Bitmex'})

    # Reading the data
    item = library.read('XBTUSD')
    xbtusd = item.data
    metadata = item.metadata
    print (xbtusd)
    print (metadata)
    
    xbtusd['ret'] = -1+xbtusd['close']/xbtusd['close'].shift(1)

    from math import sqrt
    xbtusd['vol10'] = sqrt(260)*xbtusd['ret'].rolling(10).std(ddof=0)
    xbtusd['vol30'] = sqrt(260)*xbtusd['ret'].rolling(30).std(ddof=0)

    #print (volList)

    #plt.plot(df.index, df['close'], label='price')
    plt.plot(xbtusd.index, xbtusd['vol10'], label='vol10')
    plt.plot(xbtusd.index, xbtusd['vol30'], label='vol30')
    #plt.plot(xbtusd['ret'])
    plt.ylabel('vol')
    plt.xlabel('Date')
    plt.legend(loc=0)

    plt.show()
    def __init__(self, TYPE):
        """
        :param TYPE: 'BS', 'IS', 'CF'
        """

        ############ SETTING #############
        self.config = GetConfig()
        self.TYPE = TYPE  # 'BS', 'IS', 'CF'
        self.MONGO = self.config.MONGO
        self.CSV = self.config.CSV
        self.RAW = False
        self.outdir = self.config.fund_dir
        self.encode = self.config.encode
        self.proxypool = self.config.proxypool

        ############ CHANGE ABOVE SETTING #############

        if self.MONGO:
            from arctic import Arctic
            # mongod --dbpath D:/idwzx/project/arctic
            a = Arctic(self.config.ahost)
            a.initialize_library('ashare_{}'.format(self.TYPE))
            self.lib = a['ashare_{}'.format(self.TYPE)]

        self.result_dict = {}
class ArcticBinary:
    def __init__(self,
                 lib_name: str = _ARCTIC_BINARY_LIBRARY,
                 mongo_db: str = "auto"):
        """假定一个 instance 只操作一个 library
            mongo_db :
                "auto" 根据环境是否为 colab 自动选择  google 还是 local
                "google" 选择 google 的 mongo
                "intranet" 选择机房中的Mongo
        """
        # 这里 暂时先 hardcode arctic 所使用的 mongo 地址
        mongo_db_conn_str = get_mongo_admin_conn_str()
        if mongo_db == "google":
            mongo_db_conn_str = get_google_mongo_conn_str()
        elif mongo_db == "intranet":
            mongo_db_conn_str = get_intranet_mongo_conn_str()
        self._store = Arctic(mongo_db_conn_str)
        if not self._store.library_exists(lib_name):
            self._store.initialize_library(lib_name, VERSION_STORE)
        self._lib = self._store[lib_name]

    def write_bin_object(self, bin_data: bytes, symbol: str):
        self._lib.write(symbol, bin_data)

    def read_bin_object(self, symbol: str) -> bytes:
        return self._lib.read(symbol).data

    def has_symbol(self, symbol: str) -> bytes:
        return self._lib.has_symbol(symbol)
Exemple #14
0
class ArcticSaver(AbstractBaseSaver):
    """
    Serializer for Arctic VersionStore.
    """
    def __init__(self,
                 host: str = 'localhost',
                 library: str = 'test_log',
                 note: str = '') -> None:
        """
        Library given at init, collection determined by self.name_str.
        """
        self.host = host
        self.library = library
        self.db = Arctic(host)
        self.db.initialize_library(library)
        self.store = self.db[library]
        super().__init__(note)

    def save(self,
             df: pd.DataFrame,
             what: str,
             contract_str: str,
             note: str = '') -> None:
        self.store.write(self.name_str(what, contract_str), df)

    def keys(self) -> List[str]:
        return self.store.list_symbols()

    def read(self, key: str) -> pd.DataFrame:
        return self.store.read(key)

    def __str__(self):
        return (f'ArcticSaver(host={self.host}, library={self.library}, '
                f'note={self.note})')
def test_multiprocessing_safety(mongo_host, library_name):
    # Create/initialize library at the parent process, then spawn children, and start them aligned in time
    total_processes = 64
    total_writes_per_child = 100

    register_get_auth_hook(my_auth_hook)

    global MY_ARCTIC
    MY_ARCTIC = Arctic(mongo_host=mongo_host)

    MY_ARCTIC.initialize_library(library_name, VERSION_STORE)
    assert isinstance(MY_ARCTIC.get_library(library_name), VersionStore)

    processes = [Process(target=f, args=(library_name, total_writes_per_child, True)) for _ in range(total_processes)]

    for p in processes:
        p.start()

    for p in processes:
        p.join()

    for p in processes:
        assert p.exitcode == 0

    assert isinstance(MY_ARCTIC.get_library(library_name), VersionStore)
    def __init__(self,
                 collection_name,
                 database_name=None,
                 host=None,
                 port=None):

        database_name, host, _port_not_used = mongo_defaults(db=database_name,
                                                             host=host,
                                                             port=port)

        # Arctic doesn't accept a port

        store = Arctic(host)
        library_name = database_name + "." + collection_name
        store.initialize_library(
            library_name)  # will this fail if already exists??
        library = store[library_name]

        self.database_name = database_name
        self.collection_name = collection_name
        self.host = host

        self.store = store
        self.library_name = library_name
        self.library = library
Exemple #17
0
class Database(object):
    def __init__(self, sym, exchange):
        self.counter = 0
        self.data = list()
        self.sym = sym
        self.tz = tz.utc
        self.exchange = exchange
        if configs.RECORD_DATA:
            self.db = Arctic(configs.MONGO_ENDPOINT)
            self.db.initialize_library(configs.ARCTIC_NAME,
                                       lib_type=TICK_STORE)
            self.collection = self.db[configs.ARCTIC_NAME]
            print('%s is recording %s\n' % (self.exchange, self.sym))
        else:
            self.db = None
            self.collection = None

    def new_tick(self, msg):
        if self.db is not None:
            self.counter += 1
            msg['index'] = dt.now(tz=self.tz)
            msg['system_time'] = str(msg['index'])
            self.data.append(msg)
            if self.counter % configs.BATCH_SIZE == 0:
                print('%s added %i msgs to Arctic' % (self.sym, self.counter))
                self.collection.write(self.sym, self.data)
                self.counter = 0
                self.data.clear()
Exemple #18
0
    def __init__(self, use_arctic=False):
        """

        :param use_arctic: If True, Simulator creates a connection to Arctic,
                            Otherwise, no connection is attempted
        """
        self._avg = None
        self._std = None
        self.cwd = os.path.dirname(os.path.realpath(__file__))
        self.z_score = lambda x: (x - self._avg) / self._std
        try:
            if use_arctic:
                print('Attempting to connect to Arctic...')
                self.arctic = Arctic(MONGO_ENDPOINT)
                self.arctic.initialize_library(ARCTIC_NAME,
                                               lib_type=TICK_STORE)
                self.library = self.arctic[ARCTIC_NAME]
                print('Connected to Arctic.')
            else:
                print('Not connecting to Arctic')
                self.arctic, self.library = None, None
        except Exception as ex:
            self.arctic, self.library = None, None
            print('Unable to connect to Arctic database')
            print(ex)
Exemple #19
0
    def import_data(self, doc_type_l=["Tick", "Transaction", "Order"]):
        # clear all the old files
        for fp in LOG_FILE_PATH, DATA_INFO_PATH, DATA_FINISH_INFO_PATH, DATA_PATH:
            self._get_empty_folder(fp)

        arc = Arctic(ARCTIC_SRV)
        for doc_type in DOC_TYPE:
            # arc.initialize_library(get_library_name(doc_type), lib_type=CHUNK_STORE)
            arc.set_quota(get_library_name(doc_type), MAX_SIZE)
        arc.reset()

        # doc_type = 'Day'
        for doc_type in doc_type_l:
            date_list = list(
                set([
                    int(path.split("_")[0])
                    for path in os.listdir(DATABASE_PATH) if doc_type in path
                ]))
            date_list.sort()
            date_list = [str(date) for date in date_list]

            f = open(ALL_STOCK_PATH, "r")
            stock_name_list = [lines.split("\t")[0] for lines in f.readlines()]
            f.close()
            stock_name_dict = {
                "SH": [
                    stock_name[2:] for stock_name in stock_name_list
                    if "SH" in stock_name
                ],
                "SZ": [
                    stock_name[2:] for stock_name in stock_name_list
                    if "SZ" in stock_name
                ],
            }

            lib_name = get_library_name(doc_type)
            a = Arctic(ARCTIC_SRV)
            # a.initialize_library(lib_name, lib_type=CHUNK_STORE)

            stock_name_exist = a[lib_name].list_symbols()
            lib = a[lib_name]
            initialize_count = 0
            for stock_name in stock_name_list:
                if stock_name not in stock_name_exist:
                    initialize_count += 1
                    # A placeholder for stocks
                    pdf = pd.DataFrame(index=[pd.Timestamp("1900-01-01")])
                    pdf.index.name = "date"  # an col named date is necessary
                    lib.write(stock_name, pdf)
            print("initialize count: {}".format(initialize_count))
            print("tasks: {}".format(date_list))
            a.reset()

            # date_list = [files.split("_")[0] for files in os.listdir("./raw_data_price") if "tar" in files]
            # print(len(date_list))
            date_list = ["20201231"]  # for test
            Parallel(n_jobs=min(2, len(date_list)))(
                delayed(add_data)(date, doc_type, stock_name_dict)
                for date in date_list)
 def open(self, store='chunkstore'):
     self.db = Arctic('localhost')
     try:
         self.store = self.db[store]
     except:
         self.db.initialize_library(store, lib_type=CHUNK_STORE)
         self.store = self.db[store]
         self.store._arctic_lib.set_quota(maxDBStorage * 1024 * 1024 * 1024)
Exemple #21
0
    def __init__(self, host):
        #self.MONGOHOST = 'localhost'
        """Initializes the store here if it hasn't already z"""

        try:
            register_library_type(FunStore._LIBRARY_TYPE, FunStore)
        except Exception:
            pass
        self.store = Arctic(host)
Exemple #22
0
 def initialize_db(self, lib):
     if lib:
         db = Arctic('localhost')
         if lib not in db.list_libraries():
             self.logger.info(
                 'Data library \'%s\' does not exist -- creating it', lib)
             db.initialize_library(lib, lib_type=TICK_STORE)
         self.dlib = lib
         self.dbconn = db[lib]
Exemple #23
0
def ingest_trades(filename: str, library: str, symbol: str) -> None:
    store = Arctic('localhost')
    logger.info(f"Saving to library: {library}, symbol: {symbol}")
    # Defaults to VersionStore
    store.initialize_library(library)
    library = store[library]
    df = pd.read_csv(filename, names=COLUMN_NAMES)
    df.time = pd.to_datetime(df.time, unit='s')
    library.write(symbol, df, metadata={'source': 'csv'})
class ArcticStoreDatabase(Database):
    def __init__(self):
        super().__init__()
        self.db = None
        self.store = None

    def open(self, store='chunkstore'):
        self.db = Arctic('localhost')
        try:
            self.store = self.db[store]
        except:
            self.db.initialize_library(store, lib_type=CHUNK_STORE)
            self.store = self.db[store]
            self.store._arctic_lib.set_quota(maxDBStorage * 1024 * 1024 * 1024)

    def close(self):
        pass  #no need to close arctic connection

    def remove(self, key):
        self.store.delete(key)  #used for debugging

    def getMeatdata(self, key):
        return self.store.read_metadata(key)

    def setMetadata(self, key, metadata):
        self.store.write_metadata(key, metadata)

    def _save(self, key, data):
        if self.has_key(key):
            self.store.append(key, data)
        else:
            self.store.write(key, data, chunk_size=chunkSizes.get(key, 'M'))

    def get(self, key, start=None, end=None, iterator=False):
        if not iterator:
            return self.store.read(key,
                                   chunk_range=DateRange(
                                       start, end, CLOSED_CLOSED))
        else:
            return self.store.iterator(key,
                                       chunk_range=DateRange(
                                           start, end, CLOSED_CLOSED))

    def getLatestRow(self, key):
        latestDate = self.store.read_metadata(key)['end']
        return self.get(key, start=latestDate, end=None)

    def getFirstRow(self, key):
        firstDate = self.store.read_metadata(key)['start']
        return self.get(key, start=None, end=firstDate)

    def has_key(self, key):
        return self.store.has_symbol(key)

    def list_keys(self):
        return self.store.list_symbols()
Exemple #25
0
def insert_random_data(config, args, n_rows):
    store = Arctic(args.mongodb, app_name="benchmark")
    lib_name = lib_name_from_args(config)
    store.delete_library(lib_name)
    store.initialize_library(lib_name, segment='month')
    lib = store[lib_name]

    for sym in range(args.symbols):
        df = gen_oneminute_dataset(n_row=n_rows, n_col=n_rows, dense=args.dense)
        lib.write('sym' + str(sym), df)
Exemple #26
0
 def __init__(self, lib: str, host: str = 'localhost') -> None:
     """
     Library name is whatToShow + barSize, eg.
     TRADES_1_min
     BID_ASK_1_hour
     MIDPOINT_30_secs
     """
     lib = lib.replace(' ', '_')
     self.db = Arctic(host)
     self.db.initialize_library(lib)
     self.store = self.db[lib]
Exemple #27
0
 def _arctic_loader():
     host = Arctic(arctic_opts['host'])
     lib = host.get_library(arctic_opts['library'])
     read_kwargs = {}
     start, end = map(arctic_opts.get, ['start', 'end'])
     if start and end:
         read_kwargs['chunk_range'] = pd.date_range(start, end)
     data = lib.read(arctic_opts['node'], **read_kwargs)
     if isinstance(data, VersionedItem):
         data = data.data
     return data
    def setUp(self):
        self.robot_user = "******"
        self.import_comment = "importcomment"
        self.arctic = Arctic('localhost')
        self.library_name = marc.random_library(self.arctic)
        lu.logger.info("Created test library {}".format(self.library_name))
        self.arctic.initialize_library(self.library_name)
        simple_pd =pu.create_simple_series(['a','b','c'],5)
        lib = self.arctic[self.library_name]

        import_pandas(lib,simple_pd,"symbol",RevisionInfo(who=self.robot_user,what=self.import_comment, when = datetime.datetime.now()))
        import_pandas(lib,simple_pd,"ES.SETL.EOD",RevisionInfo(who=self.robot_user,what="import something else", when = datetime.datetime.now()))
Exemple #29
0
def _get_lib(lib_name: "lib name(str)" = "default",
             lib_type: "lib type" = VERSION_STORE):
    client = MongoClient(host=config.MONGO_HOST,
                         port=27017,
                         username=config.MONGO_USER,
                         password=config.MONGO_PWD,
                         authSource=config.MONGO_AUTHDB)

    a = Arctic(client)
    if not a.library_exists(lib_name):
        a.initialize_library(lib_name, lib_type=lib_type)
    return a[lib_name]
Exemple #30
0
 def init_db_connection(self):
     """
     Initiate database connection
     :return: (void)
     """
     print("init_db_connection for {}...".format(self.sym))
     try:
         self.db = Arctic(MONGO_ENDPOINT)
         self.db.initialize_library(ARCTIC_NAME, lib_type=TICK_STORE)
         self.collection = self.db[ARCTIC_NAME]
     except PyMongoError as e:
         print("Database.PyMongoError() --> {}".format(e))
Exemple #31
0
    def startDB(self, storename='fx'):
        self.con = subprocess.Popen("%s %s %s" %
                                    (self.mongod, "--dbpath", self.dbPath),
                                    shell=True)

        self.store = Arctic(self.address)

        if not self.store.library_exists('fx'):
            self.store.initialize_library(storename)

        self.library = self.store[storename]
        self.rc = self.con.returncode
Exemple #32
0
def read_from_db(target_building, start_time=None, end_time=None):
    '''
    load the data from for tgt_bldg
    return:
    {
        point name: data
    }
    data is in pandas.DataFrame format with two columns ['date', 'data']
    '''
    if isinstance(start_time, arrow.Arrow):
        start_time = start_time.datetime
    elif isinstance(start_time, (dt, date)):
        pass
    elif start_time == None:
        pass
    else:
        raise ValueError('the type of time value is unknown: {0}'.format(
            type(start_time)))
    if isinstance(end_time, arrow.Arrow):
        end_time = end_time.datetime
    elif isinstance(end_time, (dt, date)):
        pass
    elif end_time == None:
        pass
    else:
        raise ValueError('the type of time value is unknown: {0}'.format(
            type(end_time)))
    if start_time and end_time:
        date_range = DateRange(start=start_time, end=end_time)
    else:
        date_range = None

    print('loading timeseries data from db for %s...' % target_building)

    conn = Arctic('localhost')
    if target_building not in conn.list_libraries():
        raise ValueError('%s not found in the DB!' % target_building)
    else:
        lib = conn[target_building]
        srcids = lib.list_symbols()
        res = {}
        for srcid in srcids:
            data = lib.read(srcid, chunk_range=date_range)
            if len(data) == 0:
                print('WARNING: {0} has empty data.'.format(srcid))
                #pdb.set_trace()
                continue

            res[srcid] = data
        print('correctly done')

        return res
def main():
    parser = argparse.ArgumentParser(prog="store", description='Store data to DB')
    parser.add_argument('--host', help="MongoDB host", default=MONGO_HOST_DEFAULT, type=str)
    parser.add_argument('--updater', help="Updater", default=UPDATER_DEFAULT, type=str)
    parser.add_argument('-s', '--source', help="Source", default=SOURCE_DEFAULT, type=str)
    parser.add_argument('--symbols', help="Symbol", default=SYMBOLS_DEFAULT, type=str)
    parser.add_argument('--start', help="Start date", default='', type=str)
    parser.add_argument('--end', help="End date", default='', type=str)
    parser.add_argument('--freq', help="Freq", default='', type=str)
    parser.add_argument('--max_rows', help="max_rows", default=10, type=int)
    parser.add_argument('--max_columns', help="max_columns", default=6, type=int)
    parser.add_argument('--api_key', help="API key", default='', type=str)
    parser.add_argument('--expire_after', help="Cache expiration ('0': no cache, '-1': no expiration, 'HH:MM:SS.X': expiration duration)", default='24:00:00.0', type=str)
    args = parser.parse_args()

    pd.set_option('max_rows', args.max_rows)
    pd.set_option('expand_frame_repr', False)
    pd.set_option('max_columns', args.max_columns)

    if args.start != '':
        start = pd.to_datetime(args.start)
    else:
        start = None

    if args.end != '':
        end = pd.to_datetime(args.end)
    else:
        end = None

    if args.freq != '':
        freq = args.freq
    else:
        freq = None

    symbols = args.symbols.split(',')

    session = get_session(args.expire_after, 'cache')
    my_updater = updater(args.updater, session=session)
    if args.api_key != '':
        my_updater.set_credentials(api_key=args.api_key)

    store = Arctic(args.host)
    library_name = my_updater.library_name(args.source, freq)
    print(library_name)
    store.initialize_library(library_name)
    library = store[library_name]

    for symbol in symbols:
        update(library, my_updater, symbol, start, end, freq, args.source.lower())
    def __init__(self, database_name, collection_name, host = DEFAULT_MONGO_HOST):

        if database_name is None:
            database_name = DEFAULT_DB

        store = Arctic(host)
        library_name = database_name+"."+collection_name
        store.initialize_library(library_name) # will this fail if already exists??
        library = store[library_name]

        self.database_name = database_name
        self.collection_name = collection_name
        self.host = host

        self.store = store
        self.library_name = library_name
        self.library = library
def test_multiprocessing_safety_parent_children_race(mongo_host, library_name):
    # Create Arctic and directly fork/start children (no wait)
    total_iterations = 12
    total_processes = 6
    total_writes_per_child = 20

    global MY_ARCTIC

    for i in range(total_iterations):
        processes = list()

        MY_ARCTIC = Arctic(mongo_host=mongo_host)
        for j in range(total_processes):
            p = Process(target=f, args=(library_name, total_writes_per_child, False))
            p.start()  # start directly, don't wait to create first all children procs
            processes.append(p)

        MY_ARCTIC.initialize_library(library_name, VERSION_STORE)  # this will unblock spinning children

        for p in processes:
            p.join()

        for p in processes:
            assert p.exitcode == 0

        MY_ARCTIC.reset()

    assert isinstance(MY_ARCTIC.get_library(library_name), VersionStore)
Exemple #36
0
class TimeSuiteWrite(object):
    params = list(range(len(TEST_SIZES)))
    param_names = ['5K * 10^']

    def setup(self, arg):
        self.store = Arctic("127.0.0.1")
        self.store.delete_library('test.lib')
        self.store.initialize_library('test.lib')
        self.lib = self.store['test.lib']

    def teardown(self, arg):
        self.store.delete_library('test.lib')
        self.lib = None

    def time_write_dataframe_random(self, idx):
        self.lib.write('df_bench_random', df_random[idx])

    def time_write_series_random(self, idx):
        self.lib.write('series_bench_random', s_random[idx])

    def time_write_dataframe_compressible(self, idx):
        self.lib.write('df_bench_compressible', df_compress[idx])

    def time_write_series_compressible(self, idx):
        self.lib.write('series_bench_compressible', s_compress[idx])
Exemple #37
0
class TimeSuiteAppend(object):
    params = list(range(len(TEST_SIZES)))
    param_names = ['5K * 10^']

    def __init__(self):
        self.store = Arctic("127.0.0.1")

    def setup(self, idx):
        self.store.delete_library('test.lib')
        self.store.initialize_library('test.lib')
        self.lib = self.store['test.lib']

        self.lib.write('test_df', df_random[idx])

    def teardown(self, arg):
        self.store.delete_library('test.lib')
        self.lib = None 

    def time_append_dataframe(self, idx):
        self.lib.append('test_df', df_random[idx])
Exemple #38
0
 def test1(self):
     self.assertEqual(Arctic.get_min_elec_dist([[0, 0], [1, 0], [1, 1], [1, 2], [0, 2]]), 1.00)
Exemple #39
0
 def test2(self):
     self.assertEqual(
         Arctic.get_min_elec_dist([[1.0, 1.0], [30.91, 8], [4.0, 7.64], [21.12, 6.0], [11.39, 3.0], [5.31, 11.0]]),
         10.18,
     )
Exemple #40
0

def get_stock_history(ticker, start_date, end_date):
    data = ystockquote.get_historical_prices(ticker, start_date, end_date)
    df = pd.DataFrame(collections.OrderedDict(sorted(data.items()))).T
    df = df.convert_objects(convert_numeric=True)
    return df


################################################
# VersionStore: Storing and updating stock data
################################################



arctic = Arctic('localhost')

# Create a VersionStore library
arctic.delete_library('jblackburn.stocks')
arctic.initialize_library('jblackburn.stocks')
arctic.list_libraries()


stocks = arctic['jblackburn.stocks']

# get some prices
aapl = get_stock_history('aapl', '2015-01-01', '2015-02-01')
aapl

# store them in the library
stocks.write('aapl', aapl, metadata={'source': 'YAHOO'})
Exemple #41
0
 def setup(self, arg):
     self.store = Arctic("127.0.0.1")
     self.store.delete_library('test.lib')
     self.store.initialize_library('test.lib')
     self.lib = self.store['test.lib']
Exemple #42
0
 def __init__(self):
     self.store = Arctic("127.0.0.1")
		params["start"] += 200
	return price_list

from arctic import Arctic 

def download_daily_bars(instrument, start, end):
	bars = get_historical_price(instrument,start,end)
	dump = json.dumps(bars, ensure_ascii=False).encode('utf-8')
	df = pd.read_json(dump)
	print(df.head())
	if df.empty:
		return df
	df = df.set_index('Date')
	return df

store = Arctic('localhost')
if 'KRX_G' not in store.list_libraries():
	store.initialize_library('KRX_G')

lib = store['KRX_G']
krx = pd.read_csv('krx_market_symbols.csv', dtype=object)
stocks = [x for x in krx['code_google']]
print(len(stocks), " symbols")
begin = datetime.date(2000,1,1)
end = datetime.date(2016,5,30)

missing = ['KRX:152550']

#for i, stock in enumerate(stocks[160:769]):
for i, stock in enumerate(missing):
	print("%d th code=%s" % (i, stock))
Exemple #44
0
pd.set_option('max_rows', 10)
pd.set_option('expand_frame_repr', False)
pd.set_option('max_columns', 6)

from arctic_updater.updaters.truefx import TrueFXUpdater
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)

my_updater = TrueFXUpdater()
symbol = 'EURUSD'
year, month = 2015, 11
%time df = my_updater._read_one_month(symbol, year, month)

# Arctic (MongoDB)
from arctic import Arctic
store = Arctic('localhost')
library_name = 'test'
store.initialize_library(library_name)
library = store[library_name]

%time library.write(symbol, df)
%time df_retrieved = library.read(symbol).data

# HDF5
filename = my_updater._filename(symbol, year, month, '.h5')
%time df.to_hdf(filename, "data", mode='w', complevel=0, complib='zlib', format='table')
%time df_retrieved = pd.read_hdf(filename)

# Make unique index
# http://stackoverflow.com/questions/34575126/create-a-dataframe-with-datetimeindex-with-unique-values-by-adding-a-timedelta/34576154#34576154
df = df.reset_index()
Exemple #45
0
from arctic import Arctic

# Connect to Local MONGODB
store = Arctic('localhost')

# Create the library - defaults to VersionStore
store.initialize_library('NASDAQ')

# Access the library
library = store['NASDAQ']

# Load some data - maybe from Quandl
import quandl
quandl.ApiConfig.api_key = "Cxzxjy2eHKXgwMjVFhbH"
aapl = quandl.get("GOOG/NASDAQ_AAPL")

# Store the data in the library
library.write('AAPL', aapl, metadata={'source': 'Quandl'})

# Reading the data
item = library.read('AAPL')
aapl = item.data
metadata = item.metadata

Exemple #46
0
class MongoDB(object):
    # to start server - cd to "C:\Program Files\MongoDB\Server\4.0\bin"
    #   then use mongod --dbpath "path to db" when starting database server to point to the correct save location

    # use dataframe.to_dict('records') to convert to dicts to insert to DB. The DB is stored in mongoDB software

    # use Arctic from AHL to store time series data. It uses multiIndex pd.Dataframe to store, and other data goes
    #   into meta data as a dict. It will open its own server, so no need to run

    # https://www.mongodb.com/json-and-bson
    # https://stackoverflow.com/questions/20796714/how-do-i-start-mongo-db-from-windows

    # To back up database, navigate to bin folder of mongodb, then run "mongodump --out ..." where ... is the output
    #   folder to dump
    # To load into another base, navigate to bin folder again, and use "mongorestore --drop ..." where ... is the
    #   folder where database backup folders where dumped
    # MUST RUN CMD AS ADMINISTRATOR!!!!!!!

    def __init__(self):

        self.conn = Arctic("127.0.0.1")

    def verify_library(self, library):
        """ Method to test if library exists in database"""

        libs = self.conn.list_libraries()
        if library in libs:
            return
        # if database doesnt exist, alert and ask if we should create
        else:
            print('"%s" library does not exist in server data path' % library)
            create = input('create "%s"? (y, n)' % library)
            if create.lower() == 'y':
                self.conn.initialize_library(library)
            else:
                return

    @staticmethod
    def getMultiIndex(data, index, columns=None):
        """
        Method to convert dataframe to MultiIndex

        data: pd.DataFrame
            data to be MultiInexed
        index: list
            list of strings containg names of columns to be used as MultiIndex (in order left to right)

        columns:
        """

        data = data.set_index(index)
        return data

    @timeMe
    def save(self, data, library, meta_data={}, append=True):
        """
        Method to save to mongodb library using arctic database.
        
        Parameters
        ----------
        
            data: pd.DataFrame (MultiIndex)
                DataFrame containing data for database in MultiIndex structure.
                Outer index must be the index used for the arctic "symbol" in the "library"
            library: str
                which library to write to
            meta_data: dict
                Dictionary of meta data values to inlcude in save.
                If not provided will maintain existing metadata in database or set to None.
                Keys match "symbol" from data outer index.
            append: bool
                True will append data to existing data for each "symbol" in database
                False will replace data entirely

        """

        self.verify_library(library)

        if isinstance(data, pd.DataFrame):

            # get unique symbols to write
            indicies = list(data.index.unique())
            # take first index as "symbol" for database
            symbols = indicies if isinstance(indicies[0], str) else np.unique([x[0] for x in indicies])
            symbol_list = self.conn[library].list_symbols()

            # fill in blank metadata for symbols not in metadata
            noMeta = np.array(symbols)[~np.in1d(symbols, list(meta_data.keys()))]
            meta_data.update({x: None for x in noMeta})

            for sym in symbols:
                data_cut = data.loc[sym]
                data_cut = pd.DataFrame(data_cut).T if isinstance(data_cut, pd.Series) else data_cut

                if sym in symbol_list:
                    # get current data for symbol
                    db = self.conn[library].read(sym)
                    db_data = db.data
                    # if meta data is present in database but is None in metadata fed in then take from database
                    meta_data[sym] = db.metadata

                # if the symbol is already in the database and we want to "update" the series rather than simply replace
                if sym in symbol_list and append:
                    data_post = pd.concat([db_data, data_cut], axis=0)
                    # remove duplicates and keep latest data
                    if len(data_post.index.unique()) == 1:
                        data_post = data_post.drop_duplicates(keep='last')
                    else:
                        data_post = data_post.groupby(data_post.index).last()
                else:
                    data_post = data_cut

                # sort the data by the indicies
                data_post = data_post.sort_index()

                self.conn[library].write(sym, data_post, metadata=meta_data[sym])

    @timeMe
    def read(self, library, symbols=None):
        """
        Method to read from mongodb library using arctic database.
        
        Parameters
        ----------
            library: str
                which library to write to  
            symbols: 
                list of "symbols" in "library" to read
            
        """

        if symbols is None:
            symbols = self.conn[library].list_symbols()

        data = {}
        for sym in symbols:
            db = self.conn[library].read(sym)
            data[sym] = {}
            data[sym]['data'] = db.data
            if db.metadata is not None:
                data[sym].update(db.metadata)

        return data
Exemple #47
0
    def __init__(self):

        self.conn = Arctic("127.0.0.1")
# Loading data
################################################


def get_stock_history(ticker, start_date, end_date):
    data = ystockquote.get_historical_prices(ticker, start_date, end_date)
    df = pandas.DataFrame(collections.OrderedDict(sorted(data.items()))).T
    df = df.convert_objects(convert_numeric=True)
    return df


################################################
# VersionStore: Storing and updating stock data
################################################

arctic = Arctic('localhost')
lst = arctic.list_libraries()
if 'KRX' not in lst:
	arctic.initialize_library('KRX')
	

#################################
# Dealing with lots of data
#################################


#NSYE library
lib = arctic['KRX']

def load_all_stock_history_KRX():
    # 
Exemple #49
0
#
#  Arctic Key-Value store
#

from datetime import datetime as dt

import pandas as pd

from arctic import Arctic

# Connect to the mongo-host / cluster
store = Arctic(mongo_host)

# Data is grouped into 'libraries'.
# Users may have one or more named libraries:
store.list_libraries()

# Create a library
store.initialize_library('username.scratch')

# Get a library
# library = m['username.<library>']
library = store['username.scratch']

# Store some data in the library
df = pd.DataFrame({'prices': [1, 2, 3]},
                  [dt(2014, 1, 1), dt(2014, 1, 2), dt(2014, 1, 3)])
library.write('SYMBOL', df)

# Read some data from the library
# (Note the returned object has an associated version number and metadata.)
        self._arctic_lib.check_quota()
        self._collection.insert_one(to_store)

    @mongo_retry
    def delete(self, query):
        """
        Simple delete method
        """
        self._collection.delete_one(query)


# Hook the class in for the type string 'CustomArcticLibType'
register_library_type(CustomArcticLibType._LIBRARY_TYPE, CustomArcticLibType)

# Create a Arctic instance pointed at a mongo host
store = Arctic(mongo_host)

### Initialize the library
# Map username.custom_lib -> CustomArcticLibType
store.initialize_library("username.custom_lib", CustomArcticLibType._LIBRARY_TYPE)

# Now pull our username.custom_lib ; note that it has the:
#   - query(...)
#   - store(...)
#   - delete(...)
# API we defined above
lib = store["username.custom_lib"]


# Store some items in the custom library type
lib.store(Stuff("thing", dt(2012, 1, 1), object()))