def setUpClass(cls): # configure data directory DATA_DIR = path.abspath(path.join(__file__, "../setup/data")) fileName = 'USPricesSample.csv' data = os.path.join(DATA_DIR, fileName) data = data.replace('\\', '/') # Orca connect to a DolphinDB server orca.connect(HOST, PORT, "admin", "123456") # odf from import Csv.odf_csv = orca.read_csv(data, dtype={ "PERMNO": np.int32, "date": 'DATE', "TRDSTAT": 'SYMBOL', "DLSTCD": np.float32, "DLPRC": np.float32, "VOL": np.float32, "SHROUT": np.float32 }) # pdf from import Csv.pdf_csv = pd.read_csv(data, parse_dates=[1], dtype={ "PERMNO": np.int32, "SHRCD": np.int32, "HEXCD": np.int32, "DLSTCD": np.float32, "DLPRC": np.float32, "VOL": np.float32, "SHROUT": np.float32 })
def setUpClass(cls): # configure data directory DATA_DIR = path.abspath(path.join(__file__, "../setup/data")) left_fileName = 'test_join_left_table.csv' right_fileName = 'test_join_right_table.csv' data_left = os.path.join(DATA_DIR, left_fileName) data_left = data_left.replace('\\', '/') data_right = os.path.join(DATA_DIR, right_fileName) data_right = data_right.replace('\\', '/') dfsDatabase = "dfs://testjoinDB" # connect to a DolphinDB server orca.connect(HOST, PORT, "admin", "123456") _create_odf_csv(data_left, data_right) # import Csv.odf_csv_left = orca.read_csv(data_left, dtype={"TRDSTAT": "SYMBOL"}) Csv.odfs_csv_left = orca.read_table(dfsDatabase, 'tb_left') Csv.pdf_csv_left = pd.read_csv(data_left, parse_dates=[1]) Csv.odf_csv_right = orca.read_csv(data_right, dtype={"TRDSTAT": "SYMBOL"}) Csv.odfs_csv_right = orca.read_table(dfsDatabase, 'tb_right') Csv.pdf_csv_right = pd.read_csv(data_right, parse_dates=[0])
def setUpClass(cls): # configure data directory DATA_DIR = path.abspath(path.join(__file__, "../setup/data")) fileName = 'USPricesSample.csv' data = os.path.join(DATA_DIR, fileName) data = data.replace('\\', '/') # Orca connect to a DolphinDB server orca.connect(HOST, PORT, "admin", "123456") Csv.pdf_csv = pd.read_csv(data, parse_dates=[1], dtype={"PERMNO": np.int32, "SHRCD": np.int32, "HEXCD": np.int32, "DLSTCD": np.float32, "DLPRC": np.float32, "VOL": np.float32, "SHROUT": np.float32}) Csv.odfs_csv = _create_odf_csv(data) n = 100 # note that n should be a multiple of 10 re = n / 10 FromDataframe.pdf = pd.DataFrame({ "id": np.arange(1, n + 1, 1, dtype='int32'), 'date': np.repeat(pd.date_range('2019.08.01', periods=10, freq='D'), re), 'tsymbol': np.repeat(['a', 'b', 'c', 'd', 'e', 'QWW', 'FEA', 'FFW', 'DER', 'POD'], re), 'tbool': np.repeat(np.repeat(np.arange(2, dtype='bool'), 5), re), 'tchar': np.repeat(np.arange(1, 11, 1, dtype='int8'), re), 'tshort': np.repeat(np.arange(1, 11, 1, dtype='int16'), re), 'tint': np.repeat(np.arange(1, 11, 1, dtype='int32'), re), 'tlong': np.repeat(np.arange(1, 11, 1, dtype='int64'), re), 'tfloat': np.repeat(np.arange(1, 11, 1, dtype='float32'), re), 'tdouble': np.repeat(np.arange(1, 11, 1, dtype='float64'), re)}) FromDataframe.odfs = _create_odf_pandas(n, FromDataframe.pdf)
def setUpClass(cls): # configure data directory DATA_DIR = path.abspath(path.join(__file__, "../setup/data")) fileName = 'groupbyDate.csv' data = os.path.join(DATA_DIR, fileName) data = data.replace('\\', '/') dfsDatabase = "dfs://groupbyDateDB" # Orca connect to a DolphinDB server orca.connect(HOST, PORT, "admin", "123456") Csv.pdf_csv = pd.read_csv(data, parse_dates=[1], dtype={ "id": np.int32, "tbool": np.bool, "tshort": np.int16, "tint": np.int32, "tlong": np.int64, "tfloat": np.float32, "tdouble": np.float64 }) Csv.pdf_csv['tbool'] = Csv.pdf_csv["tbool"].astype(np.bool) Csv.odfs_csv = _create_odf_csv(data, dfsDatabase) Csv.odfs_csv.set_index("id", inplace=True) Csv.pdf_csv.set_index("id", inplace=True)
def setUpClass(cls): # configure data directory DATA_DIR = path.abspath(path.join(__file__, "../setup/data")) fileName = 'USPricesSample.csv' data = os.path.join(DATA_DIR, fileName) data = data.replace('\\', '/') # connect to a DolphinDB server orca.connect(HOST, PORT, "admin", "123456")
def setUpClass(cls): # configure data directory DATA_DIR = path.abspath(path.join(__file__, "../setup/data")) fileName = 'onlyNumericalColumns.csv' data = os.path.join(DATA_DIR, fileName) data = data.replace('\\', '/') # Orca connect to a DolphinDB server orca.connect(HOST, PORT, "admin", "123456") Csv.pdf_csv = pd.read_csv(data) Csv.odf_csv = orca.read_csv(data)
def setUpClass(cls): # configure data directory DATA_DIR = path.abspath(path.join(__file__, "../setup/data")) fileName = 'USPricesSample.csv' data = os.path.join(DATA_DIR, fileName) data = data.replace('\\', '/') dfsDatabase = "dfs://USPricesDB" # Orca connect to a DolphinDB server orca.connect(HOST, PORT, "admin", "123456") # import Csv.pdf_csv = pd.read_csv(data) Csv.odfs_csv = _create_odf_csv(data, dfsDatabase)
def setUpClass(cls): # configure data directory DATA_DIR = path.abspath(path.join(__file__, "../setup/data")) fileName = 'USPricesSample.csv' data = os.path.join(DATA_DIR, fileName) data = data.replace('\\', '/') # Orca connect to a DolphinDB server orca.connect(HOST, PORT, "admin", "123456") # import Csv.odf_csv = orca.read_csv(data) Csv.odf_csv.set_index('date', inplace=True) Csv.pdf_csv = pd.read_csv(data) Csv.pdf_csv.set_index('date', inplace=True)
def setUpClass(cls): # configure data directory DATA_DIR = path.abspath(path.join(__file__, "../setup/data")) fileName = 'USPricesSample.csv' data = os.path.join(DATA_DIR, fileName) data = data.replace('\\', '/') # connect to a DolphinDB server orca.connect(HOST, PORT, "admin", "123456") Csv.odf_csv = orca.read_csv(data, dtype={ "DLSTCD": np.float32, "DLPRC": np.float32 }) # pdf from import Csv.pdf_csv = pd.read_csv(data) Csv.odf_csv = Csv.odf_csv.drop(columns=['DLRET']) Csv.pdf_csv.drop(columns=['DLRET'], inplace=True)
def setUpClass(cls): # configure data directory DATA_DIR = path.abspath(path.join(__file__, "../setup/data")) fileName = 'onlyNumericalColumns.csv' data = os.path.join(DATA_DIR, fileName) data = data.replace('\\', '/') dfsDatabase = "dfs://onlyNumericalColumnsDB" # Orca connect to a DolphinDB server orca.connect(HOST, PORT, "admin", "123456") Csv.pdf_csv = pd.read_csv(data, dtype={ "tbool": np.bool, "tchar": np.int8, "tshort": np.int16, "tint": np.int32, "tlong": np.int64, "tfloat": np.float32, "tdouble": np.float64 }) Csv.odfs_csv = _create_odf_csv(data, dfsDatabase)
def setUpClass(cls): # configure data directory DATA_DIR = path.abspath(path.join(__file__, "../setup/data")) left_fileName = 'test_merge_left_table.csv' right_fileName = 'test_merge_right_table.csv' datal = os.path.join(DATA_DIR, left_fileName) datal= datal.replace('\\', '/') datar = os.path.join(DATA_DIR, right_fileName) datar = datar.replace('\\', '/') dfsDatabase = "dfs://testMergeDB" # connect to a DolphinDB server orca.connect(HOST, PORT, "admin", "123456") _create_odf_csv(datal, datar) # import Csv.odf_csv_left = orca.read_csv(datal) Csv.odfs_csv_left = orca.read_table(dfsDatabase, 'tickers') Csv.pdf_csv_left = pd.read_csv(datal, parse_dates=[0,1]) Csv.odf_csv_right = orca.read_csv(datar) Csv.odfs_csv_right = orca.read_table(dfsDatabase, 'values') Csv.pdf_csv_right = pd.read_csv(datar)
import time import os import orca import os.path as path from setup.settings import * import csv # configure data directory DATA_DIR = path.abspath(path.join(__file__, "../setup/data")) PRECISION_POINT = 1 fileName = 'USPrices.csv' data = os.path.join(DATA_DIR, fileName) data = data.replace('\\', '/') # Orca connect to a DolphinDB server orca.connect(HOST, PORT, "admin", "123456") # import startTime = time.time() odf = orca.read_csv(data) print("Orca spent " + str(time.time() - startTime) + "s importing '" + fileName + "'") # generate report.csv csvfile = open(WORK_DIR + 'report.csv', 'a') writer = csv.writer(csvfile) writer.writerow( [time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))]) writer.writerow(['groupby', 'orca'])
import orca import pandas as pd import numpy as np import matplotlib.pyplot as plt US = 'C:/DolphinDB/Orca/databases/USstocks.csv' orca.connect('localhost', 8848) def load_price_data(df): USstocks = df[df.date.dt.weekday.between(0, 4), df.PRC.notnull(), df.VOL.notnull()][[ 'PERMNO', 'date', 'PRC', 'VOL', 'RET', 'SHROUT' ]].sort_values(by=['PERMNO', 'date']) USstocks['PRC'] = USstocks.PRC.abs() USstocks['MV'] = USstocks.SHROUT * USstocks.PRC USstocks['cumretIndex'] = (USstocks + 1)['RET'].groupby( 'PERMNO', lazy=True).cumprod() USstocks['signal'] = (USstocks.shift(21) / USstocks.shift(252) - 1).groupby('PERMNO', lazy=True)['cumretIndex'].transform() return USstocks def gen_trade_tables(df): USstocks = df[(df.PRC > 5), (df.MV > 100000), (df.VOL > 0), (df.signal.notnull())] USstocks = USstocks[['date', 'PERMNO', 'MV', 'signal']].sort_values(by='date')
def setUpClass(cls): # connect to a DolphinDB server orca.connect(HOST, PORT, "admin", "123456")