Beispiel #1
0
def init_dl_top200(top200):
    syms = top200.symbol.tolist()
    tablelist = getdata.get_table_list(getdata.bqc,getdata.job_config)
    data = getdata.get_many_syms(syms, tablelist, getdata.bqc, getdata.job_config)
    data.columns = data.columns.astype(str)
    data.index.set_levels(data.index.levels[0].astype(str), level=0, inplace=True)
    return data
Beispiel #2
0
def get_sym(sym):
    store = select_HDFstore(datasetname)
    tablelist = getdata.get_table_list(getdata.bqc, getdata.job_config)
    newdf = getdata.get_sym(sym, tablelist, getdata.bqc, getdata.job_config)
    store.put(sym, newdf, format='table')
    store.close()
    return
Beispiel #3
0
def update_top200(datasetname):
    store = select_HDFstore(datasetname)
    tlisth5 = store.get('tablelistH5')
    top200 = identify_top400()
    top200syms = top200.symbol.tolist()
    tlisth5syms = tlisth5.index.tolist()
    newsyms = list(set(top200syms).difference(tlisth5syms))
    existingsyms = list(set(top200syms).intersection(tlisth5syms))
    tablelist = getdata.get_table_list(getdata.bqc, getdata.job_config)
    lastupdated = []
    if newsyms:
        for sym in newsyms:
            # This shitty error catch is for symbols that don't match to bigquery tables
            try:
                newdf = getdata.get_sym(sym, tablelist, getdata.bqc, getdata.job_config)
                store.put(sym, newdf, format='table')
                newrow = pd.DataFrame(datetime.datetime.now().strftime("%Y-%m-%d %H:%M"), columns=['last_updated'],index=[sym])
                tlisth5 = tlisth5.append(newrow)
            except Exception as e:
                print e
        try:
            store.put('tablelistH5',tlisth5,format='table')
        except Exception as e:
            print e
    # get the date at which the local copy was last updated for all the symbols about to be updated
    for sym in existingsyms:
        lastupdated.append(tlisth5.loc[sym][0])
    # Send the requests to update all the tables
    multidf = getdata.upd_many_syms(existingsyms, tablelist, lastupdated, getdata.bqc, getdata.job_config)
    for sym in multidf.index.levels[0]:
        df = multidf.loc[sym]
        store.append(sym,df,format='table')
    store.close()
    ud_dset_tlist(datasetname)
    return
Beispiel #4
0
def init_dl_top400_2(top400):
    syms = top400.symbol.tolist()
    tablelist = getdata.get_table_list(getdata.bqc, getdata.job_config)
    store = select_HDFstore(datasetname)
    symq = Queue.Queue()
    qout= Queue.Queue()
    numthreads = 20
    threads = []
    for sym in syms:
        symq.put((sym))
    for i in range(numthreads):
        t = threading.Thread(target=getdata.get_sym_loop, args=(symq, qout, tablelist, getdata.bqc, getdata.job_config))
        threads.append(t)
    for i in threads:
        i.start()
    for i in threads:
        i.join()
    for i in range(qout.qsize()):
        [df, sym] = qout.get()
        store.put(sym, df, format='table', append=True)
    store.close()
    return