def generate_market_data_for_tests(): # generate daily S&P500 data from Quandl md_request = MarketDataRequest(start_date='01 Jan 2001', finish_date='01 Dec 2008', tickers=['S&P500'], vendor_tickers=['YAHOO/INDEX_GSPC'], fields=['close'], data_source='quandl') market = Market(market_data_generator=MarketDataGenerator()) df = market.fetch_market(md_request) df.to_csv("S&P500.csv") # generate tick data from DukasCopy for EURUSD md_request = MarketDataRequest(start_date='14 Jun 2016', finish_date='15 Jun 2016', cut='NYC', category='fx', fields=['bid'], freq='tick', data_source='dukascopy', tickers=['EURUSD']) market = Market(market_data_generator=MarketDataGenerator()) df = market.fetch_market(md_request) df.to_csv("EURUSD_tick.csv")
def generate_market_data_for_tests(): # generate daily S&P500 data from Quandl md_request = MarketDataRequest(start_date="01 Jan 2001", finish_date="01 Dec 2008", tickers=["S&P500"], vendor_tickers=["YAHOO/INDEX_GSPC"], fields=["close"], data_source="quandl") market = Market(market_data_generator=MarketDataGenerator()) df = market.fetch_market(md_request) df.to_csv("S&P500.csv") # generate tick data from DukasCopy for EURUSD md_request = MarketDataRequest(start_date="14 Jun 2016", finish_date="15 Jun 2016", cut="NYC", category="fx", fields=["bid"], freq="tick", data_source="dukascopy", tickers=["EURUSD"]) market = Market(market_data_generator=MarketDataGenerator()) df = market.fetch_market(md_request) df.to_csv("EURUSD_tick.csv")
def __init__(self, market_data_generator = None): self.logger = LoggerManager().getLogger(__name__) self._all_econ_tickers = pandas.read_csv(DataConstants().all_econ_tickers) self._econ_country_codes = pandas.read_csv(DataConstants().econ_country_codes) self._econ_country_groups = pandas.read_csv(DataConstants().econ_country_groups) if market_data_generator is None: self.market_data_generator = MarketDataGenerator() else: self.market_data_generator = market_data_generator
def __init__(self, engine='plotly', data_source='bloomberg', market_data_generator=MarketDataGenerator()): self._chart = Chart(engine=engine) self._market = Market(market_data_generator=market_data_generator) self._data_source = data_source
def plot_animated_vol_market(): market = Market(market_data_generator=MarketDataGenerator()) cross = ['EURUSD']; start_date = '01 Mar 2017'; finish_date = '21 Apr 2017'; sampling = 'no' md_request = MarketDataRequest(start_date=start_date, finish_date=finish_date, data_source='bloomberg', cut='NYC', category='fx-implied-vol', tickers=cross, cache_algo='cache_algo_return') df = market.fetch_market(md_request) if sampling != 'no': df = df.resample(sampling).mean() fxvf = FXVolFactory() df_vs = [] # Grab the vol surface for each date and create a dataframe for each date (could have used a panel) for i in range(0, len(df.index)): df_vs.append(fxvf.extract_vol_surface_for_date(df, cross[0], i)) # Do static plot for first day using Plotly style = Style(title="FX vol surface of " + cross[0], source="chartpy", color='Blues') Chart(df=df_vs[0], chart_type='surface', style=style).plot(engine='plotly') # Now do animation (TODO: need to fix animation in chartpy for matplotlib) style = Style(title="FX vol surface of " + cross[0], source="chartpy", color='Blues', animate_figure=True, animate_titles=df.index, animate_frame_ms=500, normalize_colormap=False) Chart(df=df_vs, chart_type='surface', style=style).plot(engine='plotly') # Chart object is initialised with the dataframe and our chart style Chart(df=df_vs, chart_type='surface', style=style).plot(engine='plotly')
def test_huobi(): market = Market(market_data_generator=MarketDataGenerator()) # test daily historical data md_request = MarketDataRequest(start_date='11 Apr 2018', finish_date='20 Apr 2018', cut='LOC', freq='daily', data_source='huobi', category='crypto', fields=['high', 'low'], tickers=['XBTUSD']) df = market.fetch_market(md_request) assert not df.empty # test historical tick (second) data, last 5 mins from 1 min ago finish_dt = dt.datetime.utcnow() - dt.timedelta(minutes=1) start_dt = finish_dt - dt.timedelta(minutes=5) md_request = MarketDataRequest(start_date=start_dt, finish_date=finish_dt, cut='LOC', freq='tick', data_source='huobi', category='crypto', fields=['high', 'low'], tickers=['XBTUSD']) df = market.fetch_market(md_request) assert not df.empty
def process_batch(start, end, symbol): market = Market(market_data_generator=MarketDataGenerator()) md_request = MarketDataRequest(start_date=start, finish_date=end, category='fx', fields=['bid', 'ask'], freq='tick', data_source='dukascopy', tickers=[symbol.upper()]) df = market.fetch_market(md_request) print(df.tail(n=5)) df.reset_index(level=0, inplace=True) df.columns = ['_id','bid','ask'] print(df.tail(n=5)) dc = df.to_dict('records') print("converted to dict") if config.mongo['user'] == "" or config.mongo['pasw'] == "": mongouri = "mongodb://"+config.mongo['uri']+":27017" else: mongouri = "mongodb://"+config.mongo['user']+":"+config.mongo['pasw']+"@"+config.mongo['uri']+":27017/admin" print(mongouri) client = MongoClient(mongouri) db = client['prices-data'] # db = client['tests'] collection = db[symbol.lower()+'_ticks'] insert = collection.insert_many(dc) # doc= collection.find_one({}) # print(insert) return insert
def update_graph( graph_id, graph_title, stock, ticker, yaxis_title, source, ): """ :param graph_id: ID for Dash callbacks :param graph_title: Displayed on layout :param y_train_index: name of column index for y train we want to retrieve :param y_val_index: name of column index for y val we want to retrieve :param run_log_json: the json file containing the data :param display_mode: 'separate' or 'overlap' :param checklist_smoothing_options: 'train' or 'val' :param slider_smoothing: value between 0 and 1, at interval of 0.05 :return: dcc Graph object containing the updated figures """ market = Market(market_data_generator=MarketDataGenerator()) # download equities data from Yahoo md_request = MarketDataRequest( start_date="decade", # start date data_source='yahoo', # use Bloomberg as data source tickers=[stock, 'Citigroup'], # ticker (findatapy) fields=[ticker], # which fields to download vendor_tickers=[stock, 'c'], # ticker (Yahoo) vendor_fields=[ticker]) # which Bloomberg fields to download) quandl.ApiConfig.api_key = "545Z2yCpQxyZ4WH9Q55E" data=None stockString=None if(source==1): data = market.fetch_market(md_request) stockString=data[stock+'.'+ticker] else: data = quandl.get("EOD/"+stock, start_date='2017-12-28', end_date='2017-12-28') stockString=data scatterGraph = go.Scatter( x=data.index, y=stockString, name="Training", line=dict(color="rgb(54, 218, 170)"), showlegend=False, ) layout = go.Layout( title=graph_title, margin=go.layout.Margin(l=50, r=50, b=50, t=50), yaxis={"title": yaxis_title}, ) figure = go.Figure(data=[scatterGraph], layout=layout) return dcc.Graph( figure=figure, id=graph_id )
def __init__(self, market_data_generator=None, md_request=None): if market_data_generator is None: if constants.default_market_data_generator == "marketdatagenerator": from findatapy.market import MarketDataGenerator market_data_generator = MarketDataGenerator() elif constants.default_market_data_generator == 'cachedmarketdatagenerator': # NOT CURRENTLY IMPLEMENTED FOR FUTURE USE from finaddpy.market import CachedMarketDataGenerator market_data_generator = CachedMarketDataGenerator() else: from findatapy.market import MarketDataGenerator market_data_generator = MarketDataGenerator() self.speed_cache = SpeedCache() self._market_data_generator = market_data_generator self._filter = Filter() self.md_request = md_request
def test_bitcoincharts(): market = Market(market_data_generator=MarketDataGenerator()) md_request = MarketDataRequest(start_date='11 Nov 2015', finish_date='02 Feb 2018', cut='LOC', freq='tick', data_source='bitcoincharts', category='crypto', fields=['close', 'volume'], tickers=['XBTUSD_itbit']) df = market.fetch_market(md_request) assert not df.empty
def test_binance(): market = Market(market_data_generator=MarketDataGenerator()) md_request = MarketDataRequest(start_date='18 Feb 2017', finish_date='20 Feb 2018', cut='LOC', freq='daily', data_source='binance', category='crypto', fields=['close', 'volume', 'quote-asset-volume'], tickers=['WTCXBT']) df = market.fetch_market(md_request) assert not df.empty
def __init__(self): super(TradingModel, self).__init__() ##### FILL IN WITH YOUR OWN PARAMETERS FOR display, dumping, TSF etc. self.market = Market(market_data_generator=MarketDataGenerator()) self.DUMP_PATH = '' self.FINAL_STRATEGY = 'FX trend' self.SCALE_FACTOR = 1 self.DEFAULT_PLOT_ENGINE = 'matplotlib' self.br = self.load_parameters() return
def __init__(self, market_data_generator=None, md_request=None): if market_data_generator is None: if DataConstants( ).default_market_data_generator == "marketdatagenerator": from findatapy.market import MarketDataGenerator market_data_generator = MarketDataGenerator() elif DataConstants( ).default_market_data_generator == 'cachedmarketdatagenerator': # NOT CURRENTLY IMPLEMENTED FOR FUTURE USE from finexpy.market import CachedMarketDataGenerator market_data_generator = CachedMarketDataGenerator() self.market_data_generator = market_data_generator self.md_request = md_request
def construct_backtest(ticker, vendor_ticker, sma_period, data_source, start_date, quandl_api_key): backtest = Backtest() br = BacktestRequest() # Set all the parameters for the backtest br.start_date = start_date br.finish_date = datetime.datetime.utcnow() br.spot_tc_bp = 2.5 # 2.5 bps bid/ask spread br.ann_factor = 252 tech_params = TechParams() tech_params.sma_period = sma_period indicator = 'SMA' md_request = MarketDataRequest( start_date=start_date, finish_date=datetime.date.today(), freq='daily', data_source=data_source, tickers=ticker, fields=['close'], vendor_tickers=vendor_ticker, quandl_api_key=quandl_api_key) market = Market(market_data_generator=MarketDataGenerator()) # Download the market data (the asset we are trading is also # being used to generate the signal) asset_df = market.fetch_market(md_request) spot_df = asset_df # Use technical indicator to create signals # (we could obviously create whatever function we wanted for generating the signal dataframe) # However, finmarketpy has some technical indicators built in (and some signals too) tech_ind = TechIndicator() tech_ind.create_tech_ind(spot_df, indicator, tech_params); signal_df = tech_ind.get_signal() # use the same data for generating signals backtest.calculate_trading_PnL(br, asset_df, signal_df, None, False) # Get the returns and signals for the portfolio port = backtest.portfolio_cum() port.columns = [indicator + ' = ' + str(tech_params.sma_period) + ' ' + str(backtest.portfolio_pnl_desc()[0])] signals = backtest.portfolio_signal() # returns = backtest.pnl() return port, signals
def getFxData(self, startDate='14 Jun 2016', endDate='15 Jun 2016', tickers=['EURUSD'], fields=['close'], frequency='tick'): md_request = MarketDataRequest(start_date=startDate, finish_date=endDate, category=self.category, fields=fields, freq=frequency, data_source=self.datasource, tickers=tickers) market = Market(market_data_generator=MarketDataGenerator()) return market.fetch_market(md_request)
def load_minute_data(ticker, start_date='01 Jan 2019', finish_date='30 Jun 2019'): # Load tick data from DukasCopy (if doesn't exist on disk) and then save to disk as 1 minute data # This is in UTC timezone # By default the path is the working director but we can change that raw_data_path = '' # Imports of various findatapy libraries for market data downloads from findatapy.market import Market, MarketDataRequest, MarketDataGenerator import os # First we can do it by defining all the vendor fields, tickers etc. so we bypass the configuration file md_request = MarketDataRequest(start_date=start_date, finish_date=finish_date, fields=['bid', 'ask'], vendor_fields=['bid', 'ask'], freq='tick', data_source='dukascopy', tickers=ticker, vendor_tickers=ticker, category='fx') market = Market(market_data_generator=MarketDataGenerator()) compression_type = 'gzip' # you can change this to 'snappy' if you want! # Only download file if not on disk (slow to download) if not (os.path.exists(raw_data_path + ticker + '_1min.gzip')): df_tick = market.fetch_market(md_request) df_tick['mid'] = (df_tick[ticker + '.bid'] + df_tick[ticker + '.ask']) / 2.0 df_minute = pd.DataFrame( df_tick['mid'].resample("1min").first()).dropna() df_minute.to_parquet(raw_data_path + ticker + '_1min.gzip', compression=compression_type, engine='fastparquet') else: # Edit the below line if you want to pick only one of the yearly Parquet files # If you load the whole amount might run out of memory! df_minute = pd.read_parquet(raw_data_path + ticker + '_1min.gzip', engine='fastparquet') return df_minute
def generate_market_data_for_tests(start_date, finish_date): md_request = MarketDataRequest(start_date=start_date, finish_date=finish_date, cut='NYC', category='fx', fields=['bid'], freq='tick', data_source='dukascopy', tickers=['EURUSD']) market = Market(market_data_generator=MarketDataGenerator()) try: df = market.fetch_market(md_request) return df except: return None
def _download(self, md_request, folder_prefix): from findatapy.market import MarketDataRequest, MarketDataGenerator, Market logger = LoggerManager.getLogger(__name__) market = Market(market_data_generator=MarketDataGenerator()) ticker = md_request.ticker[0] df = market.fetch_market(md_request=md_request) df.columns = ['bid', 'ask', 'bidv', 'askv'] df['venue'] = 'dukascopy' df['ticker'] = ticker df['mid'] = (df['bid'].values + df['ask'].values) / 2.0 self.dump_hdf5_file(df, folder_prefix + "_" + ticker + ".h5") logger.info('Dumped to ' + folder_prefix + "_" + ticker + ".h5")
def load_une_data(): md_request = MarketDataRequest( start_date='01 Jan 2001', # Start date finish_date='12 Aug 2019', # Finish date tickers=us_states, # What we want the ticker to look like once download vendor_tickers=us_states_fred, # The ticker used by the vendor fields=[ 'close' ], # What fields we want (usually close, we can also define vendor fields) data_source='alfred', # What is the data source? # vendor_fields=['actual-release', 'first-revision', 'close'], fred_api_key=FRED_API_KEY ) # Most data sources will require us to specify an API key/password market = Market(market_data_generator=MarketDataGenerator()) df_une = market.fetch_market(md_request) df_une.columns = [x.replace('.close', '') for x in df_une.columns] return df_une
def get_figure(ticker, data_source, start_date, api_key): import matplotlib.pyplot as plt # Fetch market data from Quandl md_request = MarketDataRequest(start_date=start_date, tickers=ticker, vendor_tickers=ticker, data_source=data_source) if data_source == 'quandl': md_request.quandl_api_key = api_key df = Market(market_data_generator=MarketDataGenerator()).fetch_market(md_request) scale = 1.0 # Plot using Matplotlib fig = plt.figure(dpi=90, figsize=(scale * 3.0, scale * 2.0)) df.plot(ax=plt.gca()) return fig, df
def get_mid_price(raw_data_path, ticker='EURUSD'): # First we can do it by defining all the vendor fields, tickers etc. so we bypass the configuration file # We use findatapy md_request = MarketDataRequest( start_date='01 Jan 2007', finish_date='30 Jun 2019', fields=['bid', 'ask'], vendor_fields=['bid', 'ask'], freq='tick', data_source='dukascopy', tickers=[ticker], vendor_tickers=[ticker], category='fx') market = Market(market_data_generator=MarketDataGenerator()) compression_type = 'gzip' # you can change this to 'snappy' if you want! # Only download file if not on disk (slow to download), # then write to disk as parquet and CSV # Note: writing to CSV takes a long time, so we have commented it here! if not (os.path.exists(os.path.join(raw_data_path, ticker + '.gzip'))): df_tick = market.fetch_market(md_request) df_tick.to_parquet(os.path.join(raw_data_path, ticker + '.gzip'), compression=compression_type, engine='fastparquet') start_year = df_tick.index[0].year finish_year = df_tick.index[-1].year for i in range(start_year, finish_year + 1): df_year = df_tick[df_tick.index.year == i] df_year.to_parquet(raw_data_path + ticker + '_' + str(i) + '.gzip', compression=compression_type, engine='fastparquet') else: # Edit the below line if you want to pick only one of the yearly Parquet files # If you load the whole amount might run out of memory! df_tick = pd.read_parquet(os.path.join(raw_data_path, ticker + '_2019.gzip'), engine='fastparquet') # calculate mid-price df_tick['mid'] = (df_tick[ticker + '.ask'] + df_tick[ticker + '.bid']) / 2.0 # get 1 minute data return pd.DataFrame(df_tick['mid'].resample("1min").first()).dropna()
def load_data(): # Download the historical spot data once and store in memory, we'll process later market = Market(market_data_generator=MarketDataGenerator()) market_data_request = MarketDataRequest( start_date='01 Jan 2000', # Start date freq='daily', # Daily data data_source='quandl', # Use Quandl as data source tickers=tickers, # Ticker (Cuemacro) fields=['close'], # Which fields to download vendor_tickers=vendor_tickers, # Ticker (Quandl) vendor_fields=['close'], # Which Bloomberg fields to download cache_algo='cache_algo_return') # How to return data # You need to type your Quandl API below (or modify the DataCred file) # market_data_request.quandl_api_key = None df = market.fetch_market(market_data_request) df = df.fillna(method='ffill') df_ret = df / df.shift(1) return df, df_ret
def plot_animated_vol_market(): market = Market(market_data_generator=MarketDataGenerator()) cross = ['GBPUSD'] start_date = '01 Jun 2016' finish_date = '01 Aug 2016' sampling = 'no' md_request = MarketDataRequest(start_date=start_date, finish_date=finish_date, data_source='bloomberg', cut='LDN', category='fx-implied-vol', tickers=cross, cache_algo='internet_load_return') df = market.fetch_market(md_request) if sampling != 'no': df = df.resample(sampling).mean() fxvf = FXVolFactory() df_vs = [] # grab the vol surface for each date and create a dataframe for each date (could have used a panel) for i in range(0, len(df.index)): df_vs.append(fxvf.extract_vol_surface_for_date(df, cross[0], i)) style = Style(title="FX vol surface of " + cross[0], source="chartpy", color='Blues', animate_figure=True, animate_titles=df.index, animate_frame_ms=500, normalize_colormap=False) # Chart object is initialised with the dataframe and our chart style Chart(df=df_vs, chart_type='surface', style=style).plot(engine='matplotlib')
def load_tickers(): logger = LoggerManager.getLogger(__name__) market = Market(market_data_generator=MarketDataGenerator()) DataConstants.market_thread_technique = 'thread' # load S&P 500 ticker via wikipedia snp = pd.read_html( 'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies') tickers = snp[0]['Symbol'].to_list() # download equities data from Yahoo md_request = MarketDataRequest( start_date=START_DATE, data_source='yahoo', # use Bloomberg as data source tickers=tickers, # ticker (findatapy) fields=['close', 'open', 'high', 'low', 'volume'], # which fields to download vendor_tickers=tickers, # ticker (Yahoo) vendor_fields=['Close', 'Open', 'High', 'Low', 'Volume']) # which Bloomberg fields to download) logger.info("Loading data with threading") df = market.fetch_market(md_request) logger.info("Loading data with multiprocessing") DataConstants.market_thread_technique = 'multiprocessing' df = market.fetch_market(md_request) logger.info("Loaded data with multiprocessing") df.to_csv("temp_downloads/snp.csv")
__author__ = 'saeedamen' # loading data import datetime from chartpy import Chart, Style from findatapy.market import Market, MarketDataGenerator, MarketDataRequest from findatapy.util.loggermanager import LoggerManager logger = LoggerManager().getLogger(__name__) chart = Chart(engine='matplotlib') market = Market(market_data_generator=MarketDataGenerator()) # choose run_example = 0 for everything # run_example = 1 - download BoE data from quandl run_example = 0 ###### fetch data from Quandl for BoE rate (using Bloomberg data) if run_example == 1 or run_example == 0: # Monthly average of UK resident monetary financial institutions' (excl. Central Bank) sterling # Weighted average interest rate, other loans, new advances, on a fixed rate to private non-financial corporations (in percent) # not seasonally adjusted md_request = MarketDataRequest( start_date="01 Jan 2000", # start date data_source='quandl', # use Quandl as data source tickers=['Weighted interest rate'], fields=['close'], # which fields to download
class HistEconDataFactory(object): def __init__(self, market_data_generator = None): self.logger = LoggerManager().getLogger(__name__) self._all_econ_tickers = pandas.read_csv(DataConstants().all_econ_tickers) self._econ_country_codes = pandas.read_csv(DataConstants().econ_country_codes) self._econ_country_groups = pandas.read_csv(DataConstants().econ_country_groups) if market_data_generator is None: self.market_data_generator = MarketDataGenerator() else: self.market_data_generator = market_data_generator def get_economic_data_history(self, start_date, finish_date, country_group, data_type, source = 'fred', cache_algo = "internet_load_return"): #vendor_country_codes = self.fred_country_codes[country_group] #vendor_pretty_country = self.fred_nice_country_codes[country_group] if isinstance(country_group, list): pretty_country_names = country_group else: # get all the country names in the country_group pretty_country_names = list(self._econ_country_groups[ self._econ_country_groups["Country Group"] == country_group]['Country']) # construct the pretty tickers pretty_tickers = [x + '-' + data_type for x in pretty_country_names] # get vendor tickers vendor_tickers = [] for pretty_ticker in pretty_tickers: vendor_ticker = list(self._all_econ_tickers[ self._all_econ_tickers["Full Code"] == pretty_ticker][source].values) if vendor_ticker == []: vendor_ticker = None self.logger.error('Could not find match for ' + pretty_ticker) else: vendor_ticker = vendor_ticker[0] vendor_tickers.append(vendor_ticker) vendor_fields = ['close'] if source == 'bloomberg': vendor_fields = ['PX_LAST'] md_request = MarketDataRequest( start_date = start_date, # start date finish_date = finish_date, # finish date category = 'economic', freq = 'daily', # intraday data data_source = source, # use Bloomberg as data source cut = 'LOC', tickers = pretty_tickers, fields = ['close'], # which fields to download vendor_tickers = vendor_tickers, vendor_fields = vendor_fields, # which Bloomberg fields to download cache_algo = cache_algo) # how to return data return self.market_data_generator.fetch_market_data(md_request) def grasp_coded_entry(self, df, index): df = df.ix[index:].stack() df = df.reset_index() df.columns = ['Date', 'Name', 'Val'] countries = df['Name'] countries = [x.split('-', 1)[0] for x in countries] df['Code'] = sum( [list(self._econ_country_codes[self._econ_country_codes["Country"] == x]['Code']) for x in countries], []) return df
def load_tickers(): logger = LoggerManager.getLogger(__name__) market = Market(market_data_generator=MarketDataGenerator()) DataConstants.market_thread_technique = 'thread' # get recent list of S&P500 (some of these will fail eg. BRK.B because incorrect ticker - findatapy should handle # this error gracefully) tickers = ["MMM", "ABT", "ABBV", "ACN", "ATVI", "AYI", "ADBE", "AAP", "AES", "AET", "AMG", "AFL", "A", "APD", "AKAM", "ALK", "ALB", "AGN", "LNT", "ALXN", "ALLE", "ADS", "ALL", "GOOGL", "GOOG", "MO", "AMZN", "AEE", "AAL", "AEP", "AXP", "AIG", "AMT", "AWK", "AMP", "ABC", "AME", "AMGN", "APH", "APC", "ADI", "ANTM", "AON", "APA", "AIV", "AAPL", "AMAT", "ADM", "ARNC", "AJG", "AIZ", "T", "ADSK", "ADP", "AN", "AZO", "AVB", "AVY", "BHI", "BLL", "BAC", "BK", "BCR", "BAX", "BBT", "BDX", "BBBY", "BRK.B", "BBY", "BIIB", "BLK", "HRB", "BA", "BWA", "BXP", "BSX", "BMY", "AVGO", "BF.B", "CHRW", "CA", "COG", "CPB", "COF", "CAH", "HSIC", "KMX", "CCL", "CAT", "CBG", "CBS", "CELG", "CNC", "CNP", "CTL", "CERN", "CF", "SCHW", "CHTR", "CHK", "CVX", "CMG", "CB", "CHD", "CI", "XEC", "CINF", "CTAS", "CSCO", "C", "CFG", "CTXS", "CLX", "CME", "CMS", "COH", "KO", "CTSH", "CL", "CMCSA", "CMA", "CAG", "CXO", "COP", "ED", "STZ", "GLW", "COST", "COTY", "CCI", "CSRA", "CSX", "CMI", "CVS", "DHI", "DHR", "DRI", "DVA", "DE", "DLPH", "DAL", "XRAY", "DVN", "DLR", "DFS", "DISCA", "DISCK", "DG", "DLTR", "D", "DOV", "DOW", "DPS", "DTE", "DD", "DUK", "DNB", "ETFC", "EMN", "ETN", "EBAY", "ECL", "EIX", "EW", "EA", "EMR", "ENDP", "ETR", "EVHC", "EOG", "EQT", "EFX", "EQIX", "EQR", "ESS", "EL", "ES", "EXC", "EXPE", "EXPD", "ESRX", "EXR", "XOM", "FFIV", "FB", "FAST", "FRT", "FDX", "FIS", "FITB", "FSLR", "FE", "FISV", "FLIR", "FLS", "FLR", "FMC", "FTI", "FL", "F", "FTV", "FBHS", "BEN", "FCX", "FTR", "GPS", "GRMN", "GD", "GE", "GGP", "GIS", "GM", "GPC", "GILD", "GPN", "GS", "GT", "GWW", "HAL", "HBI", "HOG", "HAR", "HRS", "HIG", "HAS", "HCA", "HCP", "HP", "HES", "HPE", "HOLX", "HD", "HON", "HRL", "HST", "HPQ", "HUM", "HBAN", "IDXX", "ITW", "ILMN", "IR", "INTC", "ICE", "IBM", "IP", "IPG", "IFF", "INTU", "ISRG", "IVZ", "IRM", "JEC", "JBHT", "SJM", "JNJ", "JCI", "JPM", "JNPR", "KSU", "K", "KEY", "KMB", "KIM", "KMI", "KLAC", "KSS", "KHC", "KR", "LB", "LLL", "LH", "LRCX", "LEG", "LEN", "LVLT", "LUK", "LLY", "LNC", "LLTC", "LKQ", "LMT", "L", "LOW", "LYB", "MTB", "MAC", "M", "MNK", "MRO", "MPC", "MAR", "MMC", "MLM", "MAS", "MA", "MAT", "MKC", "MCD", "MCK", "MJN", "MDT", "MRK", "MET", "MTD", "KORS", "MCHP", "MU", "MSFT", "MAA", "MHK", "TAP", "MDLZ", "MON", "MNST", "MCO", "MS", "MOS", "MSI", "MUR", "MYL", "NDAQ", "NOV", "NAVI", "NTAP", "NFLX", "NWL", "NFX", "NEM", "NWSA", "NWS", "NEE", "NLSN", "NKE", "NI", "NBL", "JWN", "NSC", "NTRS", "NOC", "NRG", "NUE", "NVDA", "ORLY", "OXY", "OMC", "OKE", "ORCL", "PCAR", "PH", "PDCO", "PAYX", "PYPL", "PNR", "PBCT", "PEP", "PKI", "PRGO", "PFE", "PCG", "PM", "PSX", "PNW", "PXD", "PBI", "PNC", "RL", "PPG", "PPL", "PX", "PCLN", "PFG", "PG", "PGR", "PLD", "PRU", "PEG", "PSA", "PHM", "PVH", "QRVO", "PWR", "QCOM", "DGX", "RRC", "RTN", "O", "RHT", "REGN", "RF", "RSG", "RAI", "RHI", "ROK", "COL", "ROP", "ROST", "RCL", "R", "CRM", "SCG", "SLB", "SNI", "STX", "SEE", "SRE", "SHW", "SIG", "SPG", "SWKS", "SLG", "SNA", "SO", "LUV", "SWN", "SE", "SPGI", "SWK", "SPLS", "SBUX", "STT", "SRCL", "SYK", "STI", "SYMC", "SYF", "SYY", "TROW", "TGT", "TEL", "TGNA", "TDC", "TSO", "TXN", "TXT", "COO", "HSY", "TRV", "TMO", "TIF", "TWX", "TJX", "TMK", "TSS", "TSCO", "TDG", "RIG", "TRIP", "FOXA", "FOX", "TSN", "UDR", "ULTA", "USB", "UA", "UAA", "UNP", "UAL", "UNH", "UPS", "URI", "UTX", "UHS", "UNM", "URBN", "VFC", "VLO", "VAR", "VTR", "VRSN", "VRSK", "VZ", "VRTX", "VIAB", "V", "VNO", "VMC", "WMT", "WBA", "DIS", "WM", "WAT", "WEC", "WFC", "HCN", "WDC", "WU", "WRK", "WY", "WHR", "WFM", "WMB", "WLTW", "WYN", "WYNN", "XEL", "XRX", "XLNX", "XL", "XYL", "YHOO", "YUM", "ZBH", "ZION", "ZTS", ] # download equities data from Yahoo md_request = MarketDataRequest( start_date="decade", # start date data_source='yahoo', # use Bloomberg as data source tickers=tickers, # ticker (findatapy) fields=['close'], # which fields to download vendor_tickers=tickers, # ticker (Yahoo) vendor_fields=['Close']) # which Bloomberg fields to download) logger.info("Loading data with threading") df = market.fetch_market(md_request) logger.info("Loading data with multiprocessing") DataConstants.market_thread_technique = 'multiprocessing' df = market.fetch_market(md_request) logger.info("Loaded data with multiprocessing")
class HistEconDataFactory(object): def __init__(self, market_data_generator = None): self.logger = LoggerManager().getLogger(__name__) self._all_econ_tickers = pandas.read_csv(DataConstants().all_econ_tickers) self._econ_country_codes = pandas.read_csv(DataConstants().econ_country_codes) self._econ_country_groups = pandas.read_csv(DataConstants().econ_country_groups) if market_data_generator is None: self.market_data_generator = MarketDataGenerator() else: self.market_data_generator = market_data_generator def get_economic_data_history(self, start_date, finish_date, country_group, data_type, source = 'fred', cache_algo = "internet_load_return"): if isinstance(country_group, list): pretty_country_names = country_group else: # get all the country names in the country_group pretty_country_names = list(self._econ_country_groups[ self._econ_country_groups["Country Group"] == country_group]['Country']) # construct the pretty tickers pretty_tickers = [x + '-' + data_type for x in pretty_country_names] # get vendor tickers vendor_tickers = [] for pretty_ticker in pretty_tickers: vendor_ticker = list(self._all_econ_tickers[ self._all_econ_tickers["Full Code"] == pretty_ticker][source].values) if vendor_ticker == []: vendor_ticker = None self.logger.error('Could not find match for ' + pretty_ticker) else: vendor_ticker = vendor_ticker[0] vendor_tickers.append(vendor_ticker) vendor_fields = ['close'] if source == 'bloomberg': vendor_fields = ['PX_LAST'] md_request = MarketDataRequest( start_date = start_date, # start date finish_date = finish_date, # finish date category = 'economic', freq = 'daily', # intraday data data_source = source, # use Bloomberg as data source cut = 'LOC', tickers = pretty_tickers, fields = ['close'], # which fields to download vendor_tickers = vendor_tickers, vendor_fields = vendor_fields, # which Bloomberg fields to download cache_algo = cache_algo) # how to return data return self.market_data_generator.fetch_market_data(md_request) def grasp_coded_entry(self, df, index): df = df.ix[index:].stack() df = df.reset_index() df.columns = ['Date', 'Name', 'Val'] countries = df['Name'] countries = [x.split('-', 1)[0] for x in countries] df['Code'] = sum( [list(self._econ_country_codes[self._econ_country_codes["Country"] == x]['Code']) for x in countries], []) return df
def create_test_raw_data(self, ticker_list=None, start_date=None, finish_date=None, folder_prefix=None): """Downloads FX tick data from DukasCopy and then dumps each ticker in a separate HDF5 file if a folder is specified. If no folder is specified returns a list of DataFrames (note: can be a very large list in memory) Parameters ---------- ticker_list : str (list) List of FX tickers to download start_date : datetime/str Start date of FX tick data download finish_date : datetime/str Finish date of FX tick data download folder_prefix : str Folder to dump everything Returns ------- DataFrame (list) """ from findatapy.market import MarketDataRequest, MarketDataGenerator, Market if start_date is None and finish_date is None: finish_date = datetime.datetime.utcnow().date() - timedelta( days=30) start_date = finish_date - timedelta(days=30 * 15) start_date = self._compute_random_date(start_date, finish_date) finish_date = start_date + timedelta(days=90) df_list = [] result = [] # From multiprocessing.dummy import Pool # threading from multiprocess.pool import Pool # actuall new processes import time # If we don't specify a folder if folder_prefix is None: mini_ticker_list = self._split_list(ticker_list, 2) # Use multiprocess to speed up the download for mini in mini_ticker_list: pool = Pool(processes=2) for ticker in mini: time.sleep(1) self.logger.info("Loading " + ticker) md_request = MarketDataRequest( start_date=start_date, finish_date=finish_date, category='fx', tickers=ticker, fields=['bid', 'ask', 'bidv', 'askv'], data_source='dukascopy', freq='tick') # self._download(md_request) result.append( pool.apply_async(self._download, args=( md_request, folder_prefix, ))) pool.close() pool.join() else: market = Market(market_data_generator=MarketDataGenerator()) for ticker in ticker_list: md_request = MarketDataRequest( start_date=start_date, finish_date=finish_date, category='fx', tickers=ticker, fields=['bid', 'ask', 'bidv', 'askv'], data_source='dukascopy', freq='tick') df = market.fetch_market(md_request=md_request) df.columns = ['bid', 'ask', 'bidv', 'askv'] df['venue'] = 'dukascopy' df['ticker'] = ticker # print(df) if folder_prefix is not None: self.dump_hdf5_file(df, folder_prefix + "_" + ticker + ".h5") # df.to_csv(folder_prefix + "_" + ticker + ".csv") # CSV files can be very large, so try to avoid else: df_list.append(df) return df_list