def init_dl_top200(top200): syms = top200.symbol.tolist() tablelist = getdata.get_table_list(getdata.bqc,getdata.job_config) data = getdata.get_many_syms(syms, tablelist, getdata.bqc, getdata.job_config) data.columns = data.columns.astype(str) data.index.set_levels(data.index.levels[0].astype(str), level=0, inplace=True) return data
def get_sym(sym): store = select_HDFstore(datasetname) tablelist = getdata.get_table_list(getdata.bqc, getdata.job_config) newdf = getdata.get_sym(sym, tablelist, getdata.bqc, getdata.job_config) store.put(sym, newdf, format='table') store.close() return
def update_top200(datasetname): store = select_HDFstore(datasetname) tlisth5 = store.get('tablelistH5') top200 = identify_top400() top200syms = top200.symbol.tolist() tlisth5syms = tlisth5.index.tolist() newsyms = list(set(top200syms).difference(tlisth5syms)) existingsyms = list(set(top200syms).intersection(tlisth5syms)) tablelist = getdata.get_table_list(getdata.bqc, getdata.job_config) lastupdated = [] if newsyms: for sym in newsyms: # This shitty error catch is for symbols that don't match to bigquery tables try: newdf = getdata.get_sym(sym, tablelist, getdata.bqc, getdata.job_config) store.put(sym, newdf, format='table') newrow = pd.DataFrame(datetime.datetime.now().strftime("%Y-%m-%d %H:%M"), columns=['last_updated'],index=[sym]) tlisth5 = tlisth5.append(newrow) except Exception as e: print e try: store.put('tablelistH5',tlisth5,format='table') except Exception as e: print e # get the date at which the local copy was last updated for all the symbols about to be updated for sym in existingsyms: lastupdated.append(tlisth5.loc[sym][0]) # Send the requests to update all the tables multidf = getdata.upd_many_syms(existingsyms, tablelist, lastupdated, getdata.bqc, getdata.job_config) for sym in multidf.index.levels[0]: df = multidf.loc[sym] store.append(sym,df,format='table') store.close() ud_dset_tlist(datasetname) return
def init_dl_top400_2(top400): syms = top400.symbol.tolist() tablelist = getdata.get_table_list(getdata.bqc, getdata.job_config) store = select_HDFstore(datasetname) symq = Queue.Queue() qout= Queue.Queue() numthreads = 20 threads = [] for sym in syms: symq.put((sym)) for i in range(numthreads): t = threading.Thread(target=getdata.get_sym_loop, args=(symq, qout, tablelist, getdata.bqc, getdata.job_config)) threads.append(t) for i in threads: i.start() for i in threads: i.join() for i in range(qout.qsize()): [df, sym] = qout.get() store.put(sym, df, format='table', append=True) store.close() return