Exemple #1
0
    def import_data(self, doc_type_l=["Tick", "Transaction", "Order"]):
        # clear all the old files
        for fp in LOG_FILE_PATH, DATA_INFO_PATH, DATA_FINISH_INFO_PATH, DATA_PATH:
            self._get_empty_folder(fp)

        arc = Arctic(ARCTIC_SRV)
        for doc_type in DOC_TYPE:
            # arc.initialize_library(get_library_name(doc_type), lib_type=CHUNK_STORE)
            arc.set_quota(get_library_name(doc_type), MAX_SIZE)
        arc.reset()

        # doc_type = 'Day'
        for doc_type in doc_type_l:
            date_list = list(
                set([
                    int(path.split("_")[0])
                    for path in os.listdir(DATABASE_PATH) if doc_type in path
                ]))
            date_list.sort()
            date_list = [str(date) for date in date_list]

            f = open(ALL_STOCK_PATH, "r")
            stock_name_list = [lines.split("\t")[0] for lines in f.readlines()]
            f.close()
            stock_name_dict = {
                "SH": [
                    stock_name[2:] for stock_name in stock_name_list
                    if "SH" in stock_name
                ],
                "SZ": [
                    stock_name[2:] for stock_name in stock_name_list
                    if "SZ" in stock_name
                ],
            }

            lib_name = get_library_name(doc_type)
            a = Arctic(ARCTIC_SRV)
            # a.initialize_library(lib_name, lib_type=CHUNK_STORE)

            stock_name_exist = a[lib_name].list_symbols()
            lib = a[lib_name]
            initialize_count = 0
            for stock_name in stock_name_list:
                if stock_name not in stock_name_exist:
                    initialize_count += 1
                    # A placeholder for stocks
                    pdf = pd.DataFrame(index=[pd.Timestamp("1900-01-01")])
                    pdf.index.name = "date"  # an col named date is necessary
                    lib.write(stock_name, pdf)
            print("initialize count: {}".format(initialize_count))
            print("tasks: {}".format(date_list))
            a.reset()

            # date_list = [files.split("_")[0] for files in os.listdir("./raw_data_price") if "tar" in files]
            # print(len(date_list))
            date_list = ["20201231"]  # for test
            Parallel(n_jobs=min(2, len(date_list)))(
                delayed(add_data)(date, doc_type, stock_name_dict)
                for date in date_list)
Exemple #2
0
def add_one_stock_daily_data_wrapper(filepath, type, exchange_place, index,
                                     date):
    pid = os.getpid()
    code = os.path.split(filepath)[-1].split(".csv")[0]
    arc = Arctic(ARCTIC_SRV)
    try:
        if index % 100 == 0:
            print("index = {}, filepath = {}".format(index, filepath))
        error_index_list = add_one_stock_daily_data(filepath, type,
                                                    exchange_place, arc, date)
        if error_index_list is not None and len(error_index_list) > 0:
            f = open(
                os.path.join(
                    LOG_FILE_PATH,
                    "temp_timestamp_error_{0}_{1}_{2}.txt".format(
                        pid, date, type)), "a+")
            f.write("{}, {}, {}\n".format(filepath, error_index_list,
                                          exchange_place + "_" + code))
            f.close()

    except Exception as e:
        info = traceback.format_exc()
        print("error:" + str(e))
        f = open(
            os.path.join(LOG_FILE_PATH,
                         "temp_fail_{0}_{1}_{2}.txt".format(pid, date, type)),
            "a+")
        f.write("fail:" + str(filepath) + "\n" + str(e) + "\n" + str(info) +
                "\n")
        f.close()

    finally:
        arc.reset()
Exemple #3
0
 def __init__(self, use_arctic=False, z_score=True, alpha=None):
     """
     Simulator constructor
     :param use_arctic: If TRUE, Simulator creates a connection to Arctic,
                         Otherwise, no connection is attempted
     :param z_score: If TRUE, normalize data with z-score,
                     ELSE use min-max scaler
     """
     self._scaler = StandardScaler() if z_score else MinMaxScaler()
     self.cwd = os.path.dirname(os.path.realpath(__file__))
     self.ema = load_ema(alpha=alpha)
     try:
         if use_arctic:
             print('Attempting to connect to Arctic...')
             self.arctic = Arctic(MONGO_ENDPOINT)
             self.arctic.initialize_library(ARCTIC_NAME,
                                            lib_type=TICK_STORE)
             self.library = self.arctic[ARCTIC_NAME]
             print('Connected to Arctic.')
         else:
             self.arctic = self.library = None
     except Exception as ex:
         self.arctic = self.library = None
         print('Unable to connect to Arctic database')
         print(ex)
Exemple #4
0
    def __init__(self, start_t=date(2010, 1, 1), end_t=date(2016, 12, 31)):
        # 从 pkl 读数据是另外一个 class 处理
        # cache 的文件路径将根据 start_t , end_t , FEATURES 做 hash
        # 暂时根据 start / end 确定存盘路径,以后改成只有一份全部数据的 pickle
        cache_file_path = os.path.join(
            "/tmp",
            f"TestTSDataGenerator_{start_t.isocalendar()}_{end_t.isoformat()}.pkl"
        )
        self.original_data: pd.DataFrame = None
        self.df_x_to_loop: pd.DataFrame = None
        self.df_y_to_loop: pd.DataFrame = None

        if os.path.isfile(cache_file_path):
            self.original_data = pd.read_pickle(cache_file_path,
                                                compression="gzip")
        else:  # 从  arctic 读取数据并缓存
            arctic_store = Arctic(get_mongo_admin_conn_str())
            lib_name = "jy_chn_equity_otvn_chunkstore"
            lib_chunk_store = arctic_store[lib_name]
            symbol_name = "mkt_data"
            self.original_data: pd.DataFrame = lib_chunk_store.read(
                symbol_name,
                chunk_range=pd.date_range(start_t, end_t),
                filter_data=True,
                columns=self.FEATURES)
            self.original_data.to_pickle(cache_file_path,
                                         compression="gzip",
                                         protocol=4)
Exemple #5
0
def sync():
    store = Arctic('localhost')
    store.initialize_library('Bitmex')
    library = store['Bitmex']
    df = get_candle_pandas()
    print (df)
    library.write('XBTUSD', df, metadata={'source': 'Bitmex'})
Exemple #6
0
def show():        
    # Connect to Local MONGODB
    store = Arctic('localhost')
    # Create the library - defaults to VersionStore
    store.initialize_library('Bitmex')
    # Access the library
    library = store['Bitmex']
    #library.write('XBTUSD', df, metadata={'source': 'Bitmex'})

    # Reading the data
    item = library.read('XBTUSD')
    xbtusd = item.data
    metadata = item.metadata
    print (xbtusd)
    print (metadata)
    
    xbtusd['ret'] = -1+xbtusd['close']/xbtusd['close'].shift(1)

    from math import sqrt
    xbtusd['vol10'] = sqrt(260)*xbtusd['ret'].rolling(10).std(ddof=0)
    xbtusd['vol30'] = sqrt(260)*xbtusd['ret'].rolling(30).std(ddof=0)

    #print (volList)

    #plt.plot(df.index, df['close'], label='price')
    plt.plot(xbtusd.index, xbtusd['vol10'], label='vol10')
    plt.plot(xbtusd.index, xbtusd['vol30'], label='vol30')
    #plt.plot(xbtusd['ret'])
    plt.ylabel('vol')
    plt.xlabel('Date')
    plt.legend(loc=0)

    plt.show()
    def __init__(self, TYPE):
        """
        :param TYPE: 'BS', 'IS', 'CF'
        """

        ############ SETTING #############
        self.config = GetConfig()
        self.TYPE = TYPE  # 'BS', 'IS', 'CF'
        self.MONGO = self.config.MONGO
        self.CSV = self.config.CSV
        self.RAW = False
        self.outdir = self.config.fund_dir
        self.encode = self.config.encode
        self.proxypool = self.config.proxypool

        ############ CHANGE ABOVE SETTING #############

        if self.MONGO:
            from arctic import Arctic
            # mongod --dbpath D:/idwzx/project/arctic
            a = Arctic(self.config.ahost)
            a.initialize_library('ashare_{}'.format(self.TYPE))
            self.lib = a['ashare_{}'.format(self.TYPE)]

        self.result_dict = {}
Exemple #8
0
 def __init__(self, save_to_file: bool = True, host: str = 'localhost',
              library: str = 'tick_blotter',
              collection: str = 'test_blotter') -> None:
     self.db = Arctic(host)
     self.db.initialize_library(library, lib_type=TICK_STORE)
     self.store = self.db[library]
     self.collection = collection
Exemple #9
0
    def __init__(self, use_arctic=False):
        """

        :param use_arctic: If True, Simulator creates a connection to Arctic,
                            Otherwise, no connection is attempted
        """
        self._avg = None
        self._std = None
        self.cwd = os.path.dirname(os.path.realpath(__file__))
        self.z_score = lambda x: (x - self._avg) / self._std
        try:
            if use_arctic:
                print('Attempting to connect to Arctic...')
                self.arctic = Arctic(MONGO_ENDPOINT)
                self.arctic.initialize_library(ARCTIC_NAME,
                                               lib_type=TICK_STORE)
                self.library = self.arctic[ARCTIC_NAME]
                print('Connected to Arctic.')
            else:
                print('Not connecting to Arctic')
                self.arctic, self.library = None, None
        except Exception as ex:
            self.arctic, self.library = None, None
            print('Unable to connect to Arctic database')
            print(ex)
    def __init__(self, stocks=['QQQ', 'TQQQ', 'SQQQ'], db='sqlite', storage_dir=None, db_file='stock_data.sqlite'):
        self.db = db
        if storage_dir is None:
            home_dir = os.path.expanduser("~")
            self.storage_dir = os.path.join(home_dir, '.yfinance_data')
            if not os.path.exists(self.storage_dir):
                os.makedirs(self.storage_dir)
        else:
            self.storage_dir = storage_dir

        if db == 'sqlite':
            self.db_file = db_file
            # 4 slashes for absolute path: https://docs.sqlalchemy.org/en/13/core/engines.html#sqlite
            self.e = create_engine('sqlite:///{}/{}'.format(self.storage_dir, self.db_file))
            self.con = self.e.connect()
        else:
            self.e = None
            self.con = None

        if db == 'arctic':
            self.store = Arctic('localhost')
            self.store.initialize_library('yfinance_stockdata')
            self.library = self.store['yfinance_stockdata']

        self.stocks = stocks
    def __init__(self,
                 collection_name,
                 database_name=None,
                 host=None,
                 port=None):

        database_name, host, _port_not_used = mongo_defaults(db=database_name,
                                                             host=host,
                                                             port=port)

        # Arctic doesn't accept a port

        store = Arctic(host)
        library_name = database_name + "." + collection_name
        store.initialize_library(
            library_name)  # will this fail if already exists??
        library = store[library_name]

        self.database_name = database_name
        self.collection_name = collection_name
        self.host = host

        self.store = store
        self.library_name = library_name
        self.library = library
def getArcticLibraries():
    print(">>> Function called getArcticLibraries()")
    # Connect to local MONGODB
    store = Arctic('localhost')
    print "+ Arctic connected to MongoDB at localhost"
    print("+ Requesting libraries from Arctic store")
    return store.list_libraries()
Exemple #13
0
 def __init__(self):
     try:
         print('Attempting to connect to Arctic...')
         self.scaler = MinMaxScaler()
         self.arctic = Arctic(MONGO_ENDPOINT)
         self.arctic.initialize_library(ARCTIC_NAME, lib_type=TICK_STORE)
         self.library = self.arctic[ARCTIC_NAME]
         self.reference_data_old_names = [
             'system_time', 'day', 'coinbase_midpoint'
         ]
         self.reference_data_new_names = ['t', 'd', 'm']
         self.number_of_workers = cpu_count()
         self.queue = Queue(maxsize=self.number_of_workers)
         self.return_queue = Queue(maxsize=self.number_of_workers)
         self.workers = [
             Process(name='Process-%i' % num,
                     args=(self.queue, self.return_queue),
                     target=self._do_work)
             for num in range(self.number_of_workers)
         ]
         print('Connected to Arctic.')
     except Exception as ex:
         self.arctic, self.library, self.workers = None, None, None
         print('Unable to connect to Arctic database')
         print(ex)
def getSubsetFromArcticStore(tickerName, arcticLibraryName, start_date,
                             end_date):
    print(">>> Function called getSubsetFromArcticStore(" + tickerName + ", " +
          arcticLibraryName + ")")
    # Connect to local MONGODB
    source = 'localhost'
    store = Arctic(source)
    print "+ Arctic connected to MongoDB at " + source

    # Access the library
    library = store[arcticLibraryName]

    # base = datetime.datetime.today()
    # date_list = [base - datetime.timedelta(days=x) for x in range(0, 365)]

    # Reading the data

    try:
        print "+ Arctic reading " + tickerName + " from " + arcticLibraryName
        item = library.read(tickerName,
                            date_range=DateRange(start_date, end_date))
        if (isEmptyDataFrame(item.data)):
            print("! Error no data returned for " + tickerName)
        else:
            return item.data
    except Exception, e:
        print("! Error " + tickerName + " not found in " + arcticLibraryName +
              ": " + str(e))
Exemple #15
0
    def save_to_arctic(self):
        # see https://github.com/manahl/arctic/blob/master/howtos/201507_demo_pydata.py
        arctic_store = Arctic(get_mongo_admin_conn_str())
        # print(arctic_store.list_libraries())
        closeprice_lib = arctic_store["jy_equity_closeprice"]
        # print(closeprice_lib)
        df = self.load_all_close_price()
        # print(df.index.to_frame()["o"].unique())

        # df.reset_index(level=1, inplace=True)
        # print(df)

        df2 = df.pivot_table(values="close_price",
                             index="t",
                             columns="o",
                             aggfunc=np.mean)
        # print(df2)
        #
        # print(df2.columns)
        i = 0
        for col in df2.columns:
            df3 = df2.loc[:, col]
            df3 = df3.dropna(axis=0)
            closeprice_lib.write(col, df3)
            i += 1
            print(f"{i}:{col}")
            # if i > 5 :
            #     break

        print(closeprice_lib.list_symbols())
Exemple #16
0
    def save_to_chunkstore_per_symbol(self):
        lib_name = "jy_equity_mkt_data"
        arctic_store = Arctic(get_mongo_admin_conn_str())
        arctic_store.delete_library(lib_name)
        arctic_store.initialize_library(lib_name, lib_type=CHUNK_STORE)
        lib_chunk_store = arctic_store[lib_name]

        df = self.load_all_close_price()

        df2 = df.pivot_table(values="close_price",
                             index="t",
                             columns="o",
                             aggfunc=np.mean)
        df2.index.rename("date", inplace=True)

        i = 0
        for col in df2.columns:
            df3 = df2.loc[:, col]
            df3 = df3.dropna(axis=0)
            lib_chunk_store.write(col,
                                  df3,
                                  chunker=DateChunker(),
                                  chunk_size="D")
            i += 1
            if i % 2 == 0:
                print(f"{i}:{col}")
def test_multiprocessing_safety_parent_children_race(mongo_host, library_name):
    # Create Arctic and directly fork/start children (no wait)
    total_iterations = 12
    total_processes = 6
    total_writes_per_child = 20

    global MY_ARCTIC

    for i in range(total_iterations):
        processes = list()

        MY_ARCTIC = Arctic(mongo_host=mongo_host)
        for j in range(total_processes):
            p = Process(target=f,
                        args=(library_name, total_writes_per_child, False))
            p.start(
            )  # start directly, don't wait to create first all children procs
            processes.append(p)

        MY_ARCTIC.initialize_library(
            library_name, VERSION_STORE)  # this will unblock spinning children

        for p in processes:
            p.join()

        for p in processes:
            assert p.exitcode == 0

        MY_ARCTIC.reset()

    assert isinstance(MY_ARCTIC.get_library(library_name), VersionStore)
def test_multiprocessing_safety(mongo_host, library_name):
    # Create/initialize library at the parent process, then spawn children, and start them aligned in time
    total_processes = 64
    total_writes_per_child = 100

    register_get_auth_hook(my_auth_hook)

    global MY_ARCTIC
    MY_ARCTIC = Arctic(mongo_host=mongo_host)

    MY_ARCTIC.initialize_library(library_name, VERSION_STORE)
    assert isinstance(MY_ARCTIC.get_library(library_name), VersionStore)

    processes = [
        Process(target=f, args=(library_name, total_writes_per_child, True))
        for _ in range(total_processes)
    ]

    for p in processes:
        p.start()

    for p in processes:
        p.join()

    for p in processes:
        assert p.exitcode == 0

    assert isinstance(MY_ARCTIC.get_library(library_name), VersionStore)
 def open(self, store='chunkstore'):
     self.db = Arctic('localhost')
     try:
         self.store = self.db[store]
     except:
         self.db.initialize_library(store, lib_type=CHUNK_STORE)
         self.store = self.db[store]
         self.store._arctic_lib.set_quota(maxDBStorage * 1024 * 1024 * 1024)
Exemple #20
0
def arctic(mongo_server_module):
    from arctic import Arctic

    disable_arctic_cache(mongo_server_module.api)
    mongo_server_module.api.drop_database("arctic")
    mongo_server_module.api.drop_database("arctic_{}".format(
        getpass.getuser()))
    return Arctic(mongo_server_module.api)
Exemple #21
0
 def show_chunk_store_info(self):
     arctic_store = Arctic(get_mongo_admin_conn_str())
     lib_chunk_store = arctic_store["jy_otv_chunkstore"]
     print("list_symbols")
     print(lib_chunk_store.list_symbols())
     print("get_info")
     print(lib_chunk_store.get_info("close_price"))
     print("chunk_ranges")
     print(list(lib_chunk_store.get_chunk_ranges("close_price")))
Exemple #22
0
    def __init__(self, host):
        #self.MONGOHOST = 'localhost'
        """Initializes the store here if it hasn't already z"""

        try:
            register_library_type(FunStore._LIBRARY_TYPE, FunStore)
        except Exception:
            pass
        self.store = Arctic(host)
Exemple #23
0
    def build_db(self, n_episodes, fld):
        """
		db最后的结构是一个数组,每一个元素是一个数据序列+表示该序列数据特征的字符串
		:param n_episodes:
		:param fld:
		:return:
		"""
        store = Arctic('localhost')
        library = store['EOD_PASSTHROUGH']
        self.quote = library.read("sz002024")['ac']
        '''
		根据n_episodes和self.window_episode来slice行情数据,取整数个window_episode
		'''
        episode = len(self.quote.index) // self.window_episode
        if (n_episodes > episode):
            n_episodes = episode
        self.quote = self.quote[-n_episodes * self.window_episode:]

        db = []
        for i in range(n_episodes):
            #prices, title = self.sample()

            prices = []
            p = []

            part = self.quote[i * self.window_episode:((i + 1) *
                                                       self.window_episode)]
            idx = part.index[0].strftime(
                '%Y-%m-%d') + "~" + part.index[-1].strftime('%Y-%m-%d')
            '''
			while True:
				p = np.append(p, self.quote(full_episode=False)[0])
				if len(p) > self.window_episode:
					break
			base, base_title = self.__rand_sin(
				period_range=self.base_period_range,
				amplitude_range=self.base_amplitude_range,
				noise_amplitude_ratio=0.,
				full_episode=True)
			'''
            # p = [x for x in part.values]
            prices.append(np.array(part.values))
            # return np.array(prices).T,

            db.append((np.array(prices).T, '[%i]_' % i + idx))

        print(db)

        if os.path.exists(fld):
            shutil.rmtree(fld)
        os.makedirs(fld)
        # os.makedirs()	# don't overwrite existing fld
        pickle.dump(db, open(os.path.join(fld, 'db.pickle'), 'wb'))
        param = {'n_episodes': n_episodes}
        for k in self.attrs:
            param[k] = getattr(self, k)
        json.dump(param, open(os.path.join(fld, 'param.json'), 'w'))
Exemple #24
0
def ingest_trades(filename: str, library: str, symbol: str) -> None:
    store = Arctic('localhost')
    logger.info(f"Saving to library: {library}, symbol: {symbol}")
    # Defaults to VersionStore
    store.initialize_library(library)
    library = store[library]
    df = pd.read_csv(filename, names=COLUMN_NAMES)
    df.time = pd.to_datetime(df.time, unit='s')
    library.write(symbol, df, metadata={'source': 'csv'})
Exemple #25
0
 def initialize_db(self, lib):
     if lib:
         db = Arctic('localhost')
         if lib not in db.list_libraries():
             self.logger.info(
                 'Data library \'%s\' does not exist -- creating it', lib)
             db.initialize_library(lib, lib_type=TICK_STORE)
         self.dlib = lib
         self.dbconn = db[lib]
Exemple #26
0
def init():
    '''
    运行在本地,60没有外部端口
    '''
    c = pymongo.MongoClient(host='localhost', port=27018)
    db = c.bert2vec
    coll = db.guizhou
    store = Arctic('localhost')
    library = store['bert2vec']
    return coll, library
Exemple #27
0
def append_random_rows(config, args, n_rows):
    store = Arctic(args.mongodb, app_name="benchmark")
    lib_name = lib_name_from_args(config)

    lib = store[lib_name]

    for _ in range(args.appends):
        for sym in range(args.symbols):
            df = gen_oneminute_dataset(n_row=APPEND_NROWS, n_col=n_rows, dense=False)
            lib.append('sym' + str(sym), df)
Exemple #28
0
def insert_random_data(config, args, n_rows):
    store = Arctic(args.mongodb, app_name="benchmark")
    lib_name = lib_name_from_args(config)
    store.delete_library(lib_name)
    store.initialize_library(lib_name, segment='month')
    lib = store[lib_name]

    for sym in range(args.symbols):
        df = gen_oneminute_dataset(n_row=n_rows, n_col=n_rows, dense=args.dense)
        lib.write('sym' + str(sym), df)
Exemple #29
0
 def __init__(self, connection_string=None):
     if connection_string is not None:
         self.connection_string = connection_string
     else:
         setting = "localhost"
         if setting is None:
             raise ValueError("")
         else:
             self.connection_string = setting
     self.store = Arctic(self.connection_string)
Exemple #30
0
 def __init__(self, hub: Hub,
              exchange: Exchange,
              market: Market,
              date_range: Optional[DateRange] = None
              ) -> None:
     self.library_name = str(exchange)
     self.symbol = str(market)
     self.store = Arctic('localhost')
     self.library = self.store[self.library_name]
     self.date_range = date_range
     self.publisher = Publisher(hub, prefix='arctic_trade_processor')