Esempio n. 1
0
def main(category, tickers, collection_name, download=False, upload=False):
    #set the path to store downloaded data by update date
    update_date = datetime.date.today().strftime('%Y%m%d')
    path = os.path.join(
        "//paicdom/paamhk/Aurelius/Data/DataDownloading/DataUpdate/data/" +
        category + "/" + update_date)
    if not os.path.isdir(path):
        os.makedirs(path)

    if download:
        #download data from bloomberg and store each ticker's data as a csv file
        ##download adjustment data
        adj_normal, invalidSec = bbg.BBGFormula(tickers).getAdjFactor(
            adj_type='NORMAL_CASH')
        adj_abnormal, invalidSec = bbg.BBGFormula(tickers).getAdjFactor(
            adj_type='ABNORMAL_CASH')
        adj_split, invalidSec = bbg.BBGFormula(tickers).getAdjFactor(
            adj_type='CAPITAL_CHANGE')
        adj = pd.DataFrame()
        adj = adj.append(adj_split, ignore_index=True)
        adj = adj.append(adj_abnormal, ignore_index=True)
        adj = adj.append(adj_normal, ignore_index=True)
        adj.insert(len(adj.columns), "update_date", update_date, True)
        adj.to_csv(path + '/AF.csv', mode='a', index=False)

    if upload:
        #upload downloaded csv
        data_mongo = copy.deepcopy(adj)
        data_mongo['date'] = data_mongo['date'].apply(
            lambda x: int(pd.to_datetime(x, yearfirst=True).value // 1e9))
        # content = open(download_path).read()
        # length = len(content)
        # if length == 1:
        #     print('no new date')
        # else:
        tomongo(data_mongo,
                collection_name=collection_name,
                to_mongo=False,
                replace=False,
                db_name='MarketDataUpdate')
def main(category,
         endDate,
         fields,
         fields_new,
         fx_list,
         expiry_list,
         expiry_num,
         option_type,
         collection_name,
         download=False,
         upload=False):
    #set the path to store downloaded data by update date
    update_date = datetime.date.today().strftime('%Y%m%d')
    path = os.path.join(
        "//paicdom/paamhk/Aurelius/Data/DataDownloading/DataUpdate/data/" +
        category + "/" + update_date)
    if not os.path.isdir(path):
        os.makedirs(path)
    # read data out from MongoDB to get tickers and startDate

#data = query_all(db_name = 'MarketData',collection_name = collection_name)
    data = query_all(db_name='MarketDataUpdate',
                     collection_name=collection_name)
    #TODO: check if all the tickers have the same startDate to update from
    # chuchu: checking the sameDatein findMInDateFXVOL.py file
    #download data from startDate, so updated data has one day duplicated
    startDate = data['date'].max()
    startDate = pd.to_datetime(startDate, unit='s') + pd.Timedelta('1 days')
    startDate = startDate.strftime('%Y%m%d')

    #Lucas:change startdate to one week before today
    #startDate = datetime.strftime(date.today()-timedelta(8),'%Y%m%d')

    option_list = list(option_type.values())

    # def getTicker(fx,option,expiry,suffix='BGN Curncy'):
    #     if fx =='HKDCNH':
    #         suffix='BLCS Curncy'
    #     ticker=fx+option+expiry+' '+suffix
    #     return ticker
    #
    # tickers=pd.DataFrame(columns=['ticker','name','feature','expiry_str','tenor'])
    # for i in range(len(fx_list)):
    #     for j in range(len(expiry_list)):
    #         for k in range(len(option_list)):
    #             ticker=getTicker(fx_list[i],option_list[k],expiry_list[j],suffix='BGN Curncy')
    #             tickers=tickers.append({'ticker':ticker,'name':fx_list[i],
    #                                     'feature':list(option_type.keys())[k],
    #                                     'expiry_str':expiry_list[j], 'tenor':expiry_num[j]},ignore_index=True)
    #

    old_data = pd.read_csv(
        '/home/pingan/PycharmProjects/test/FX_Vol/dayRangeMarketData.csv')
    startDate = old_data['endDate'] + pd.Timedelta('1 days')

    tickers = pd.read_csv(
        '/home/pingan/PycharmProjects/test/FX_Vol/dayRangeMarketDataUpdate.csv'
    )

    ##get the earlist start date as the input startDate for getDataframe
    ticker_list = list(tickers['ticker'])
    if download:
        date = bbg.BBGFormula(ticker_list).getStartDate()
        date = date[date['start_date'] != 'error']
        date = date.reset_index(drop=True)
        date.start_date = date.start_date.apply(
            lambda x: pd.to_datetime(x, yearfirst=True).strftime('%Y%m%d'))

        ticker_list = list(date['ticker'])

        fdata, invalidSec = bbg.BBGFormula(
            ticker_list).multi_ticker_multi_fields_BDH(fields, startDate,
                                                       endDate)

        data_mongo = pd.merge(fdata, tickers, how='left', on='ticker')

        old = copy.deepcopy(fields)
        new = copy.deepcopy(fields_new)
        data_mongo.rename(columns=dict(zip(old, new)), inplace=True)

        #value is the mid price
        #data_mongo['value'] = 0.5*[data_mongo['ask'] + data_mongo['bid'] ]
        #Lucas: Not suitable when value is a list
        def f(x):
            return 0.5 * (x['ask'] + x['bid'])

        data_mongo['value'] = data_mongo.apply(f, axis=1)
        data_mongo['date'] = data_mongo['date'].apply(
            lambda x: int(pd.to_datetime(x, yearfirst=True).value // 1e9))
        data_mongo.to_csv(path + '/' + ticker + collection_name + '.csv',
                          index=False)

    if upload:
        tomongo(data_mongo,
                collection_name=collection_name,
                to_mongo=True,
                replace=False,
                db_name='MarketDataUpdate')
Esempio n. 3
0
def main(category,
         endDate,
         tickers,
         collection_name,
         download=False,
         upload=False):
    #set the path to store downloaded data by update date
    update_date = datetime.date.today().strftime('%Y%m%d')
    path = os.path.join(
        "//paicdom/paamhk/Aurelius/Data/DataDownloading/DataUpdate/data/" +
        category + "/" + update_date)
    if not os.path.isdir(path):
        os.makedirs(path)

    assert len(tickers) == 4
    ticker = tickers[0]
    name = ticker[:ticker.find(" ")]
    startDate = "19000101"
    if download:
        #download data from bloomberg and store each ticker's data as a csv file

        dataTFF, invalidSec = bbg.BBGFormula(ticker).adjust_sigle_ticker_BDH(
            fields,
            startDate,
            endDate,
            split=True,
            adjustNormal=False,
            adjustAbnormal=False)
        dataTFF.rename(columns={
            'PX_LAST': 'close_split_only',
            'PX_OPEN': 'open_split_only',
            'PX_HIGH': 'high_split_only',
            'PX_LOW': 'low_split_only',
            'PX_VOLUME': 'volume_split'
        },
                       inplace=True)
        #Because volume only has split adjustment, so all adjust will not influence volume
        #So we don't have to download all adjusted volume
        fields.remove("PX_VOLUME")
        dataTTT, invalidSec = bbg.BBGFormula(ticker).adjust_sigle_ticker_BDH(
            fields,
            startDate,
            endDate,
            split=True,
            adjustNormal=True,
            adjustAbnormal=True)
        dataTTT.rename(columns={
            'PX_LAST': 'close_threeAdj',
            'PX_OPEN': 'open_threeAdj',
            'PX_HIGH': 'high_threeAdj',
            'PX_LOW': 'low_threeAdj'
        },
                       inplace=True)

        dataTTT = dataTTT[list(set(dataTTT.columns) - set(["ticker"]))]
        dataa = pd.merge(dataTFF, dataTTT, on='date', how='outer')
        dataa.to_csv(path + "/" + ticker + '.csv', index=False)
        #1) upload downloaded csv file of adjusted price&volume

        data_mongo = pd.DataFrame()
        for file in os.listdir(path):
            dataone = pd.read_csv(os.path.join(path, file))
            dataone['date'] = dataone['date'].apply(
                lambda x: int(pd.to_datetime(x, yearfirst=True).value // 1e9))
            for col in list(set(dataone.columns) - set(['date', 'ticker'])):
                #                print(col)
                part = dataone[['date', 'ticker', col]]
                part = part.dropna(axis=0, how='any')
                part.rename(columns={col: 'value'}, inplace=True)
                part.insert(len(part.columns), 'name', file[:file.find(" ")],
                            True)
                part.insert(len(part.columns), 'tenor', 'spot', True)
                if col.find("split") < 0:
                    part.insert(len(part.columns), 'adjust', 'all', True)
                if col.find("split") >= 0:
                    part.insert(len(part.columns), 'adjust', 'split', True)
                part.insert(len(part.columns), 'feature', col[:col.find("_")],
                            True)
                data_mongo = data_mongo.append(part, ignore_index=True)

        assert len(set(data_mongo['name'])) == len(os.listdir(path))

        # 1) upload updated data without adjusted to 'MarketDataUpdate'
    if upload:
        tomongo(data_mongo,
                collection_name=collection_name,
                to_mongo=False,
                replace=False,
                db_name='MarketDataUpdate')
def main(category, endDate, fields, tickers, download = False, upload = False,change = False, collection_name = 'Equity'):
    #set the path to store downloaded data by update date
    update_date = date.today().strftime('%Y%m%d')
    path = os.path.join('//paicdom/paamhk/Aurelius/Data/DataDownloading/DataUpdate/data/' +category+"/"+update_date)
    if not os.path.isdir(path):
        os.makedirs(path)
    
    # read data out from MongoDB to get tickers and startDate
    # For index data, the tenor is spot, for index futures, the tenor is future
    #data = query_all(db_name = 'MarketData',collection_name = 'Equity', tenor = 'spot')
    #download data from startDate, so updated data has one day duplicated
    
    #Lucas: solved duplicated date by using timedelta +1day
#    startDate = data['date'].max()
#    startDate = pd.to_datetime(startDate, unit='s')+pd.Timedelta('1 days')
#    startDate = startDate.strftime('%Y%m%d')
    startDate = '20190627'

    
    if download:
        #download data from bloomberg and store each ticker's data as a csv file 
        for i in range(len(tickers)):  
            ticker=tickers[i]  
            data_down,invalidSec=bbg.BBGFormula(ticker).single_ticker_multi_fields_BDH(fields,startDate,endDate)
            data_down.to_csv(path+"/"+ticker+'.csv',index=False)
    
    if change:
        #1) upload downloaded csv file of no adjusted price&dividend
        
        old = ['PX_LAST', 'PX_OPEN', 'PX_HIGH', 'PX_LOW', 'PE_RATIO', 'BEST_PE_RATIO',
               'PX_TO_BOOK_RATIO', 'BEST_PX_BPS_RATIO','DVD_SH_LAST']
        new = ['close','open','high','low','PE','PE_E','PB','PB_E','div']
        
        
        data_mongo = pd.DataFrame()
        for file in os.listdir(path):
            dataone = pd.read_csv(os.path.join(path, file))
            dataone.rename(columns=dict(zip(old,new)), inplace=True) 
            dataone['date'] = dataone['date'].apply(lambda x: int(pd.to_datetime(x, yearfirst = True).value // 1e9))
            for col in list(set(dataone.columns) -set(['date','ticker'])):
                part = dataone[['date', 'ticker',col]]
                part = part.dropna(axis = 0, how = 'any')
                part.rename(columns = {col: 'value'}, inplace  =True)        
                part.insert(3, 'name',file[:file.find(" ")],True)
                part.insert(3, 'tenor','spot',True)
                #DAX Index& IBOV Index are already total return index
                if file[:file.find(" ")] in  ["DAX","IBOV"]:
                    part.insert(3, 'adjust','all',True)  
                else:                
                    part.insert(3, 'adjust','no',True)
                part.insert(3, 'feature',col,True)
                data_mongo = data_mongo.append(part, ignore_index = True)    
        
        assert len(set(data_mongo['ticker'])) == len(os.listdir(path))
        
        # 1) upload updated data without adjusted price to 'MarketDataUpdate'
        if upload:
            tomongo(data_mongo, collection_name = collection_name, to_mongo =True, db_name = 'MarketDataUpdate')
        
        # 2)  read out from update database and compute adjusted prices
        for ticker in os.listdir(path):
        #    print(ticker)
            #for indices, name is "SPX" ticker is "SPX Index"
            name = ticker[:ticker.find(" ")]    
            #DAX Index& IBOV Index are already total return index, so there is no need to compute adjusted prices
            if name in ["DAX","IBOV"]:
                continue
            data_old = query_part(name =name, tenor = 'spot',collection_name = collection_name,db_name = 'MarketData',adjust = 'no')
            data_new = query_part(name =name, tenor = 'spot',collection_name = collection_name,db_name = 'MarketDataUpdate', adjust = 'no')
            #we need old div data to compute new adjusted price
            data_all = data_old.append(data_new, ignore_index = True)    
            
            #data_all.loc[data_all.duplicated()]
            data_all.drop_duplicates(inplace = True)
            df = pd.DataFrame()
            for feature, temp_df in data_all.groupby("feature"):
                if not feature in ['close','open','high','low','div']:
                    continue
                print(feature)
                temp_df.rename(columns = {'value':feature}, inplace  =True)
                temp_df = temp_df[['date',feature]]
                temp_df['date'] = pd.to_datetime(temp_df['date'], unit='s')
                temp_df.set_index('date', drop=True, inplace=True)
                df = df.join(temp_df, how = 'outer')
            
            df.sort_index(inplace = True )
            df.dropna(subset = ['close','open','high','low'], how = 'all', inplace = True)    
            df.reset_index(inplace = True)
            
            for price in [ "close",'open','high','low']:
                df['adj'] = df['div'] / df[price] +1
                df['factor'] = 1.0
                date_index = (df['date'] >=  df['date'][0])
                df.loc[date_index , 'factor'] =  df.loc[date_index,'adj'].cumprod()
                df[price+"_adj"] = df[price]*df['factor']
            
            #     3)  upload adjusted price 
            data_mongo_n = pd.DataFrame()
            
            df['date'] = df['date'].apply(lambda x: int(x.value // 1e9))
            for col in ['close_adj', 'open_adj','high_adj', 'low_adj']:
        #        print(col)
                part = df[['date',col]]
                part = part.dropna(axis = 0, how = 'any')
                part.rename(columns = {col: 'value'}, inplace  =True)        
                part.insert(2, 'name',name,True)
                part.insert(2, 'ticker',name+" Index",True)    
                part.insert(2, 'tenor','spot',True)
                part.insert(2, 'adjust','all',True)
                part.insert(2, 'feature',col[:col.find("_")],True)
                data_mongo_n = data_mongo_n.append(part, ignore_index = True)    
            
            if upload:                
                data_mongo_n = data_mongo_n.loc[data_mongo_n['date'] >=data_new['date'].min()]
        #    pd.to_datetime(data_mongo_n['date'].min(), unit = 's')
            
                tomongo(data_mongo_n, collection_name = collection_name, to_mongo =True, db_name = 'MarketDataUpdate')
def main(category,
         endDate,
         ticker,
         fields,
         tenor_list,
         moneyness_list,
         collection_name,
         download=False,
         upload=False):
    #set the path to store downloaded data by update date
    update_date = date.today().strftime('%Y%m%d')
    #path = os.path.join("//paicdom/paamhk/Aurelius/Data/DataDownloading/DataUpdate/data/"+category+"/"+update_date)
    path = os.path.join(
        "//paicdom/paamhk/Aurelius/Data/DataDownloading/DataUpdate/data/" +
        category + "/" + update_date)
    if not os.path.isdir(path):
        os.makedirs(path)
    # read data out from MongoDB to get tickers and startDate
    #data = query_all(db_name = 'MarketDataUpdate',collection_name = collection_name)

    #TODO: check if all the tickers have the same startDate to update from
    #download data from startDate, so updated data has one day duplicated
    #Lucas: solved duplicated date by using timedelta +1day


#    startDate = data['date'].max()
#    startDate = pd.to_datetime(startDate, unit='s')+pd.Timedelta('1 days')
#    startDate = startDate.strftime('%Y%m%d')
    startDate = '20190627'

    #Lucas:change startdate to one week before today
    #startDate = datetime.strftime(date.today()-timedelta(8),'%Y%m%d')

    if download:
        data_mongo = pd.DataFrame()
        for terms in tenor_list[:]:
            for moneyness in moneyness_list[:]:
                print(terms)
                print(moneyness)
                value1 = 'MATURITY_' + terms
                value2 = "MONEY_LVL_" + moneyness + "_0"
                data, inv = BDH_override(ticker, fields, override_1, value1,
                                         override_2, value2, startDate,
                                         endDate)
                if not data.empty:
                    data.rename(columns={fields[0]: 'value'}, inplace=True)
                    data['tenor'] = terms
                    data['feature'] = moneyness
                    data_mongo = data_mongo.append(data)

        data_mongo['date'] = data_mongo['date'].apply(
            lambda x: int(pd.to_datetime(x, yearfirst=True).value // 1e9))
        data_mongo['tenor'] = data_mongo['tenor'].apply(
            lambda x: int(x[:x.find('D')]))
        #        data_mongo = data_mongo.replace('NKY Index', 'NKY')
        data_mongo['ticker'] = data_mongo['ticker'].apply(
            lambda x: x[:x.find(" ")])
        data_mongo.rename(columns={'ticker': 'name'}, inplace=True)
        data_mongo.to_csv(path + '/' + ticker + collection_name + '.csv',
                          index=False)
    if upload:
        tomongo(data_mongo,
                collection_name='Equity_Vol',
                to_mongo=True,
                replace=False,
                db_name='MarketDataUpdate')