def download_dataset(): delta_data = genfromtxt('time_delta_fixed.tsv', delimiter='\t', dtype=None, names=['ext_timestamp', 'time_delta']) start = gps_to_datetime(delta_data['ext_timestamp'][0] / int(1e9)) end = gps_to_datetime(delta_data['ext_timestamp'][-1] / int(1e9)) with tables.open_file('data.h5', 'w') as data: download_data(data, '/s501', 501, start, end) download_data(data, '/s501_original', 501, start, end) with tables.open_file('data.h5', 'a') as data: events = data.root.s501.events idx = delta_data['ext_timestamp'].tolist().index( events[-1]['ext_timestamp']) + 1 time_delta = delta_data['time_delta'][:idx] t3 = data.root.s501.events.col('t3')[1:] t4 = data.root.s501.events.col('t4')[1:] events.modify_column(start=1, colname='t3', column=where(t3 >= 0, t3 + time_delta, t3)) events.modify_column(start=1, colname='t4', column=where(t4 >= 0, t4 + time_delta, t4)) events.flush()
def download_events_data(data): """Download event data for each station into a separate table""" for station in pbar(STATIONS): group = '/s%d' % station if group not in data: download_data(data, group, station, start=START, end=END, progress=False)
def download_dataset(): with tables.open_file(STATION_PATH, 'w') as data: for station_number in STATIONS: download_data(data, '/s%d' % station_number, station_number, start=START, end=END, type='events', progress=True)
def download_data_for_station(station_number): path = STATION_PATH % station_number if os.path.exists(path): return path with tables.open_file(path, 'a') as data: for startdt, enddt in monthrange(START, END): print 'downloading', startdt.date(), enddt.date(), station_number download_data(data, '/s%d' % station_number, station_number, start=startdt, end=enddt, type='events', progress=False) return path
def get_weather_data_dataset(): if os.path.exists(DATA_PATH): print 'Datafile already exists, skipping download' return with tables.open_file(DATA_PATH, 'a') as data: for station in [501]: # , 502, 503, 504, 505, 506, 508, 509, 510]: for data_type in ['weather', 'events']: download_data(data, '/s%d' % station, station, datetime(2015, 10, 1), datetime(2015, 10, 15), type=data_type)
def download_dataset(): print 'Downloading data . . .' with tables.open_file(DATA, 'w'): # Clear previous data pass for station in STATIONS: delta_data = genfromtxt('data/time_delta_%d.tsv' % station, delimiter='\t', dtype=None, names=['ext_timestamp', 'time_delta']) start = gps_to_datetime(delta_data['ext_timestamp'][0] / int(1e9)) end = gps_to_datetime(delta_data['ext_timestamp'][-1] / int(1e9)) with tables.open_file(DATA, 'a') as data: download_data(data, '/s%d' % station, station, start, end) download_data(data, '/s%d_original' % station, station, start, end) with tables.open_file(DATA, 'a') as data: events = data.get_node('/s%d' % station, 'events') # Data ends before delta list because I got delta data from today delta_ets_list = delta_data['ext_timestamp'].tolist() stop_idx = delta_ets_list.index(events[-1]['ext_timestamp']) + 1 time_delta = delta_data['time_delta'][:stop_idx] event_ets_list = events.col('ext_timestamp').tolist() idx = event_ets_list.index(delta_ets_list[0]) events.remove_rows(0, idx) try: last_idx = event_ets_list[idx::].index( delta_ets_list[-1]) - idx except ValueError: pass else: events.remove_rows(last_idx) events.flush() assert all( events.col('ext_timestamp') == delta_data['ext_timestamp']) t3 = events.col('t3') t4 = events.col('t4') events.modify_column(colname='t3', column=where(t3 >= 0, t3 + time_delta, t3)) events.modify_column(colname='t4', column=where(t4 >= 0, t4 + time_delta, t4)) events.flush()
def download_501_510_dataset(): """Download a dataset for analysis """ print "Downloading 501-510 dataset." stations = [501, 510] start = datetime(2014, 10, 1) end = datetime(2014, 10, 10) with tables.open_file( '/Users/arne/Datastore/501_510/c_501_510_141001_141011.h5', 'a') as data: download_coincidences(data, stations=stations, start=start, end=end, n=2) with tables.open_file( '/Users/arne/Datastore/501_510/e_501_510_141001_141011.h5', 'a') as data: download_data(data, '/s501', 501, start=start, end=end) download_data(data, '/s510', 510, start=start, end=end) start = datetime(2014, 11, 1) end = datetime(2014, 11, 10) with tables.open_file( '/Users/arne/Datastore/501_510/c_501_510_141101_141111.h5', 'a') as data: download_coincidences(data, stations=stations, start=start, end=end, n=2) with tables.open_file( '/Users/arne/Datastore/501_510/e_501_510_141101_141111.h5', 'a') as data: download_data(data, '/s501', 501, start=start, end=end) download_data(data, '/s510', 510, start=start, end=end)
def download_events_data(data): for station in pbar(STATIONS): group = '/s%d' % station if group not in data: download_data(data, group, station, start=START, end=END, progress=False)
def get_data(data): download_data(data, '/s1001', 1001, datetime(2015, 3, 1), datetime(2015, 3, 20))
import datetime import tables from sapphire import download_data, CoincidencesESD STATIONS = [501, 502, 40001] START = datetime.datetime(2019, 7, 24) END = datetime.datetime(2019, 7, 25) if __name__ == '__main__': station_groups = ['/s%d' % u for u in STATIONS] data = tables.open_file('data.h5', 'a') for station, group in zip(STATIONS, station_groups): download_data(data, group, station, START, END) # In[32]: # We have downloaded data for three stations. Note that we used the sapphire.esd for downloading. # Thus, we have no traces and the download is quick. # In order to see what the datafile contains: We have to do the foolowing print(data) # In[33]:
def download_dataset(): with tables.open_file(DATA_PATH, 'w') as data: download_data(data, STATION_GROUP, STATION, datetime.datetime(2015, 1, 1), datetime.datetime(2015, 10, 1))
def get_data(): if os.path.exists(DATA): print 'data already exists' return with tables.open_file(DATA, 'w') as data: download_data(data, '/s99', 99, datetime(2015, 12, 4))