def load_data(exp_params): ''' Loads the data from a CSV or from the db, and it will cache the results ''' start = time.time() if CSV_LOCATION: # uses a global variable, ugly but prettier than passing it every time print "reading data from local csv" df = pd.read_csv(CSV_LOCATION) elif not LOADING_DB_DATA: print "fetching data from db or cache" global LOADING_DB_DATA LOADING_DB_DATA = True df = DataExtractor(exp_params).get_data() LOADING_DB_DATA = False else: print "waiting for query to end" while (LOADING_DB_DATA): time.sleep(1) df = DataExtractor(exp_params).get_data() # get date to be a string and also saves the unix time for every date df['theday'] = df['theday'].astype('str') df['thedayunix'] = pd.to_datetime(df['theday']).astype(np.int64) // 10**9 # drop any empty values df = df.dropna() end = time.time() print "loading data took {} seconds".format(end - start) return df.to_json(date_format='iso', orient='split')