def output(self, shared_lock=None, shared_list=None): ''' Generate data wrapper @return Wyoming sounding data in a data wrapper ''' full_results_dict = OrderedDict() full_meta_dict = OrderedDict() for query_url in generateQueries(self.station_number, self.year_list, self.month_list, self.day_start, self.day_end, self.start_hour, self.end_hour): with urlopen(query_url) as in_data: sp = SoundingParser() sp.feed(in_data.read().decode()) for key, data in sp.data_dict.items(): full_results_dict[key] = data for key, data in sp.metadata_dict.items(): full_meta_dict[key] = data return TableWrapper(obj_wrap=full_results_dict, meta_data=full_meta_dict)
def output(self): ''' Generate data wrapper for USGS geomagnetic data @return geomagnetic data wrapper ''' observatory_list = self.ap_paramList[0]() # USGS Edge server base_url = 'cwbpub.cr.usgs.gov' factory = EdgeFactory(host=base_url, port=2060) data_dict = OrderedDict() for observatory in observatory_list: ret_data = factory.get_timeseries( observatory=observatory, interval=self.interval, type=self.data_type, channels=self.channels, starttime=UTCDateTime(self.start_time), endtime=UTCDateTime(self.end_time)) obs_data = OrderedDict() for label, trace in zip(self.channels, ret_data): time = pd.to_datetime( trace.stats['starttime'].datetime) + pd.to_timedelta( trace.times(), unit='s') obs_data[label] = pd.Series(trace.data, time) data_dict[observatory] = pd.DataFrame(obs_data) return TableWrapper(data_dict, default_columns=self.channels)
def output(self): ''' Fetch Groundwater Data Wrapper @return Groundwater Data Wrapper ''' meta_data = DataFetcher.getStationMetadata() data_file = DataFetcher.getDataLocation('groundwater') if data_file is None: print("No data available") return None if len(self.ap_paramList) == 1: station_list = self.ap_paramList[0]() elif len(self.ap_paramList) == 4: llat = self.ap_paramList[0]() ulat = self.ap_paramList[1]() llon = self.ap_paramList[2]() rlon = self.ap_paramList[3]() station_index = np.logical_and.reduce([ meta_data.Lat > llat, meta_data.Lat < ulat, meta_data.Lon > llon, meta_data.Lon < rlon ]) cut_metadata = meta_data[station_index] station_list = cut_metadata[cut_metadata['Data Available'] == 1].index.tolist() else: station_list = None data_dict = OrderedDict() store = pd.HDFStore(data_file, 'r') if station_list == None: stations = [ str(site) for site in meta_data[meta_data['Data Available'] == 1].index ] else: stations = station_list for station in stations: if self.start_date != None and self.end_date != None: data = store['USGS' + str(station)].reindex( pd.date_range(self.start_date, self.end_date)) else: data = store['USGS' + str(station)] if len(data.dropna()) / len(data) >= self.cutoff: data_dict[int(station)] = data store.close() return (TableWrapper(data_dict, meta_data=meta_data, default_columns=['Median Depth to Water']))
def output(self): ''' Generate data @return Table data wrapper of generated data ''' if self.seed is not None: np.random.seed(self.seed) new_data = dict() name_list = [] for arg in self.args: new_data[arg['name']] = np.random.rand(self.length) * (arg['end'] - arg['start']) + arg['start'] name_list.append(arg['name']) if 'func' in arg: new_data[arg['name']] = arg['func'](new_data[arg['name']]) if self.final_function is not None: new_data = pd.DataFrame.from_dict(new_data) new_data, updated_column_names = self.final_function(new_data) if updated_column_names is not None: default_columns = updated_column_names data = {'generated_data' : new_data} data_wrapper = TableWrapper(data, default_columns = name_list) return data_wrapper
def output(self): """ Retrieve Tess data @return TableWrapper containing TESS lightcurves """ tid_series = pd.Series([int(tid) for tid in self.ap_paramList[0]()]) tid_string_list = [str(tid).zfill(16) for tid in tid_series] tid_not_found = tid_series.isin(self.toi_information['tic_id']) if np.count_nonzero(~tid_not_found) > 0: raise RuntimeError("No data for TID: " + str(tid_series[~tid_not_found].tolist())) url_list = self.generateURLFromTID(tid_string_list) file_list = self.cacheData('tess', url_list) data_dict = OrderedDict() metadata_dict = OrderedDict() for filename, tid in zip(file_list, tid_string_list): fits_data = fits.open(filename) data_dict[tid], metadata_dict[tid] = parseTessData(fits_data) return TableWrapper(data_dict, meta_data=metadata_dict)
def output(self): ''' Retrieve table wrapper @return TableWrapper ''' return TableWrapper(self._table_data.copy())
def output(self): ''' Generate data wrapper @return Wyoming sounding data in a data wrapper ''' url_list = generateQueries(self.station_number, self.year_list, self.month_list, 1, 31, 0, 12) file_list = self.cacheData('wyoming_sounding', url_list) full_data_dict = OrderedDict() full_meta_dict = OrderedDict() for filename in file_list: with open(filename, 'r') as sounding_data: sp = SoundingParser() sp.feed(sounding_data.read()) for label, data in sp.data_dict.items(): data_date = pd.to_datetime( sp.metadata_dict[label]['metadata'] ['Observation time'], format='%y%m%d/%H%M') data_hour = int(data_date.strftime('%H')) data_day = int(data_date.strftime('%d')) if data_day >= int(self.day_start) and \ data_day <= int(self.day_end) and \ data_hour >= int(self.start_hour) and \ data_hour <= int(self.end_hour): full_data_dict[label] = data full_meta_dict[label] = sp.metadata_dict[label] return TableWrapper(obj_wrap=full_data_dict, meta_data=full_meta_dict)
def output(self): ''' Construct NGL GPS data wrapper @return NGL GPS data wrapper ''' metadata = DataFetcher.getStationMetadata() store_location = data_util.getDataLocation(self.data_type) store = pd.HDFStore(store_location, 'r') index = np.logical_and.reduce([ self.lat_range[0] < metadata['Lat'], self.lat_range[1] > metadata['Lat'], self.lon_range[0] < metadata['Lon'], self.lon_range[1] > metadata['Lon'] ]) region_stations = metadata[index] data_dict = {} if self.data_type == 'ngl_gps': valid_station_list = list(region_stations.index) default_columns = ('dN', 'dE', 'dU') default_error_columns = ('Sn', 'Se', 'Su') freq = '1D' elif self.data_type == 'ngl_5min': store_list_tuple = set(station[6:] for station in store.keys()) selected_station_list = set(region_stations.index) valid_station_list = list( store_list_tuple.intersection(store_list_tuple)) valid_station_list.sort() default_columns = ('___n-ref(m)', '___e-ref(m)', '___v-ref(m)') default_error_columns = ('sig_n(m)', 'sig_e(m)', 'sig_v(m)') freq = '5min' min_data_len = ( (pd.to_datetime(self.start_date) - pd.to_datetime(self.end_date)) / pd.to_timedelta(freq) * self.mdyratio) for station in valid_station_list: data = store['data_' + station].loc[self.start_date:self.end_date] if len(data) >= min_data_len: data = data.reindex( pd.date_range(self.start_date, self.end_date, freq=freq)) data_dict[station] = data metadata = metadata.loc[data_dict.keys()] return TableWrapper(data_dict, meta_data=metadata, default_columns=default_columns, default_error_columns=default_error_columns)
def output(self): ''' Create a datawrapper containing GRACE mascon data @return Table Datawrapper containing Mascon GRACE data ''' geo_point_list = self.ap_paramList[0]() file_list = self.cacheData('mascon', [self.mascon_url, self.scale_factor_url]) data, metadata, lat_bounds, lon_bounds = readTellusData( file_list[0], geo_point_list, 'lat', 'lon', 'lwe_thickness', 'EWD', time_name='time', lat_bounds_name='lat_bounds', lon_bounds_name='lon_bounds') unc_data, unc_metadata = readTellusData(file_list[0], geo_point_list, 'lat', 'lon', 'uncertainty', 'EWD_Error', time_name='time', lat_bounds=lat_bounds, lon_bounds=lon_bounds)[:2] scale_data, scale_metadata = readTellusData(file_list[1], geo_point_list, 'lat', 'lon', 'scale_factor', lat_bounds=lat_bounds, lon_bounds=lon_bounds)[:2] for data_name in data.keys(): data[data_name] = pd.concat([data[data_name], unc_data[data_name]], axis=1) metadata[data_name]['scale_factor'] = scale_data[data_name] if self.start_date != None or self.end_date != None: for label in data.keys(): if self.start_date != None: data[label] = data[label][self.start_date:] if self.end_date != None: data[label] = data[label][:self.end_date] return TableWrapper(data, meta_data=metadata, default_columns=['Equivalent Water Thickness'], default_error_columns=['EWT Uncertainty'])
def output(self): ''' Generate PBO Data Wrapper @return PBO Data Wrapper ''' self._rawData() if self._validInit==1: return(TableWrapper(self._smSet_all, meta_data=self._smHdr_all, default_columns=self.default_columns, default_error_columns = self.default_error_columns)) else: print('... Invalid geographic region -- no stations within these bounds ...') return TableWrapper(dict(), default_columns=self.default_columns, default_error_columns = self.default_error_columns)
def output(self): ''' Create data wrapper of GLDAS data for specified geopoint. @return GLDAS Data Wrapper ''' data_file = DataFetcher.getDataLocation('gldas') if data_file is None: print("No data available") return None geo_point_list = self.ap_paramList[0]() gldas_data_name = 'Equivalent Water Thickness (cm)' full_data, metadata = readTellusData(data_file, geo_point_list, 'Latitude', 'Longitude', 'Water_Thickness', gldas_data_name, 'Time')[:2] # Get appropriate time range if self.start_date == None or self.end_date == None: start_date, end_date = getStartEndDate(full_data) if self.start_date != None: start_date = self.start_date elif type(self.start_date) == str: start_date = pd.to_datetime(self.start_date) if self.end_date != None: end_date = self.end_date elif type(self.end_date) == str: end_date == pd.to_datetime(self.end_date) for label in full_data.keys(): full_data[label] = full_data[label][start_date:end_date] gldas_unc = pd.Series( np.ones(len(full_data[label]), dtype=np.float) * np.nan, index=full_data[label].index, name="Uncertainty") full_data[label] = pd.concat([full_data[label], gldas_unc], axis=1) if self.resample == True: full_data[label] = full_data[label].reindex( pd.date_range(start_date, end_date)) return (TableWrapper( full_data, default_columns=['Equivalent Water Thickness (cm)'], default_error_columns=['Uncertainty']))
def output(self): ''' Generate PBO Data Wrapper @return PBO Data Wrapper ''' self._rawData() if self._validInit == 1: return (TableWrapper( self._smSet_all, meta_data=self._smHdr_all, default_columns=self.default_columns, default_error_columns=self.default_error_columns)) else: return TableWrapper( dict(), default_columns=self.default_columns, default_error_columns=self.default_error_columns)
def output(self): """ Retrieve stock data @return TableWrapper of stock data """ stock_symbols = self.ap_paramList[0]() timeseries_retriever = TimeSeries(key=DataFetcher.getConfigItem( 'stocks', 'api_key'), output_format='pandas', indexing_type='date') data_dict = OrderedDict() metadata_dict = OrderedDict() for symbol in stock_symbols: # Extract data if self.data_type == 'daily': data, metadata = timeseries_retriever.get_daily( symbol, outputsize='full') elif self.data_type == 'daily_adjusted': data, metadata = timeseries_retriever.get_daily_adjusted( symbol, outputsize='full') elif self.data_type == 'monthly': data, metadata = timeseries_retriever.get_monthly(symbol) elif self.data_type == 'monthly_adjusted': data, metadata = timeseries_retriever.get_monthly_adjusted( symbol) elif self.data_type == 'weekly': data, metadata = timeseries_retriever.get_weekly(symbol) elif self.data_type == 'weekly_adjusted': data, metadata = timeseries_retriever.get_weekly_adjusted( symbol) elif self.data_type == 'intraday': data, metadata = timeseries_retriever.get_weekly_adjusted( symbol, self.interval, outputsize='full') # Convert index to pandas datetime if self.data_type == 'intraday': data.index = pd.to_datetime(data.index).tz_localize( metadata['6. Time Zone']) else: data.index = pd.to_datetime(data.index) data_dict[symbol] = data[self.start_date:self.end_date] metadata_dict[symbol] = metadata return TableWrapper(data_dict, meta_data=metadata_dict)
def output(self): """ Retrieve data from data.lacity.org @return Table wrapper of containing specified data """ data_dict = OrderedDict() url_query = self.base_url_and_endpoint + urlencode(self.parameters) with urlopen(url_query) as remote_resource: raw_string = remote_resource.read().decode() string_data = StringIO(raw_string) data_dict[self.label] = pd.read_csv(string_data, **self.pandas_kwargs) return TableWrapper(data_dict)
def output(self): ''' Generate data wrapper @return Table wrapper of SDSS spectra data ''' url_list = self.ap_paramList[0]() data_dict = OrderedDict() meta_dict = OrderedDict() for url in url_list: with urlopen(url) as url_data: bytes_data = BytesIO(url_data.read()) hdu_list = fits.open(bytes_data) data_dict[url] = Table(hdu_list[1].data).to_pandas() meta_dict[url] = hdu_list[0].header return TableWrapper(data_dict, meta_data=meta_dict)
def output(self): ''' Generate data wrapper for Mahali temperatures @return Mahali temperature data wrapper ''' # Function to extract date from filename (only month/day/year, no hours/minutes/seconds) def toDateTime(in_filename): return pd.to_datetime(pd.to_datetime(in_filename[7:25]).strftime('%Y-%m-%d')) # Read in file list: mahali_temperature_info = resource_filename('skdaccess', os.path.join('support','mahali_temperature_info.txt')) filenames = pd.read_csv(mahali_temperature_info,header=None, names=('station','filename'), skipinitialspace=True) # Create a columns of dates filenames['date'] = filenames['filename'].apply(toDateTime) # Need to grab day before as data can spill over adjusted_start_date = self.start_date - pd.to_timedelta('1d') adjusted_end_date = self.end_date + pd.to_timedelta('1d') station_list = self.ap_paramList[0]() # Get data for each selected station one day before until one day afte requested date index_to_retrieve = np.logical_and.reduce([filenames.loc[:, 'station'].apply(lambda x: x in station_list), filenames.loc[:, 'date'] >= adjusted_start_date, filenames.loc[:, 'date'] <= self.end_date]) all_temperature_data = self.retrieveOnlineData(filenames[index_to_retrieve]) # Due to data spillover, cut each data frame in dictionary for station in all_temperature_data.keys(): all_temperature_data[station] = all_temperature_data[station].loc[adjusted_start_date:adjusted_end_date] # Return table wrapper of data return TableWrapper(all_temperature_data, default_columns = ['Temperature'])
def output(self): ''' Output kepler data wrapper @return DataWrapper ''' kid_list = self.ap_paramList[0]() kid_list = [str(kid).zfill(9) for kid in kid_list] self.cacheData(kid_list) data_location = self._getKeplerFilePath() kid_data = dict() store = openPandasHDFStoreLocking(data_location, 'r') for kid in kid_list: kid_data[kid] = store['kid_' + kid] # If downloaded using old skdaccess version # switch index if kid_data[kid].index.name == 'TIME': kid_data[kid]['TIME'] = kid_data[kid].index kid_data[kid].set_index('CADENCENO', inplace=True) store.close() kid_data = OrderedDict(sorted(kid_data.items(), key=lambda t: t[0])) # If a list of quarters is specified, only select data in those quarters if self.quarter_list != None: for kid in kid_list: kid_data[kid] = kid_data[kid][kid_data[kid]['QUARTER'].isin( self.quarter_list)] return TableWrapper(kid_data, default_columns=['PDCSAP_FLUX'], default_error_columns=['PDCSAP_FLUX_ERR'])
def output(self): ''' Generate data wrapper for Mahali tec data @return Mahali data wrapper ''' def generatePath(base_url, station, in_date): ''' Generate path to file based on station, date, and base url @param base_url: Base url to put in front of generated url @param station: Name of station @param in_date: Date of data to create path for @return The url for the station data ''' year = in_date.strftime('%Y') day = in_date.strftime('%j') date = in_date.strftime('%Y%m%d') path = 'tec/{year}/{day}/{station}-{date}.iono.gz'.format( year=year, day=day, station=station, date=date) return parse.urljoin(base_url, path) # Get station lists station_list = self.ap_paramList[0]() # Retrieve dates containing data for station list available_data_dict = retrieveCommonDatesHDF('mahali_tec_info.hdf', station_list, self.date_range) # Location of data base_url = 'http://apollo.haystack.mit.edu/mahali-data/' url_list = [] # Generate url list for station, dates in available_data_dict.items(): url_list += list( map(generatePath, repeat(base_url), repeat(station), dates)) # Cache data file_list = self.cacheData('mahali_tec', url_list) # Dictionary to hold parsed data parsed_data_dict = defaultdict(list) # Parse data for filename in file_list: station = filename[-21:-17] parsed_data_dict[station].append(parseIonoFile(filename)) # combine data frames for each station into a single combined_data_dict = OrderedDict() for station, data in parsed_data_dict.items(): combined_data_dict[station] = pd.concat(data) # Return data wrapper return TableWrapper(combined_data_dict, default_columns=['vertical_tec'])
def output(self): ''' Fetch snow coverage data for coordinates @return Data wrapper for snow coverage ''' data_file = DataFetcher.getDataLocation('imsdnhs') if data_file is None: print("No data available") return None store = pd.HDFStore(data_file) # Projection information x_start = -12288000.0 x_end = 12288000.0 y_start = 12288000.0 y_end = -12288000.0 x_dim = 6144 y_dim = 6144 x_inc = (x_end - x_start) / x_dim y_inc = (y_end - y_start) / y_dim proj = pyproj.Proj('+proj=stere +lat_0=90 +lat_ts=60 +lon_0=-80 +k=1 +x_0=0 +y_0=0 +ellps=WGS84 +datum=WGS84 +units=m +no_defs') # Function that determines the x,y image coordinate for a # given (latitude, longitude) pair def convertToXY(lat, lon): ux, uy = proj(lon,lat) x = np.round(((ux - x_start) / x_inc) - 0.5).astype(np.int) y = np.round(((uy - y_start) / y_inc) - 0.5).astype(np.int) return (x,y) label_list = [] lat_array = np.zeros(len(self.coordinate_dict),dtype=np.float) lon_array = np.zeros(len(self.coordinate_dict),dtype=np.float) for i, (label, coordinates) in enumerate(self.coordinate_dict.items()): label_list.append(label) lat_array[i] = coordinates[0] lon_array[i] = coordinates[1] x_array,y_array = convertToXY(lat_array, lon_array) # # Forming a complex number to remove duplicate # # coordinates # complex_array = np.unique(x_array * 1j * y_array) # x_array = complex_array.real # y_array = complex_array.imag data_dict = OrderedDict() for label,x,y in zip(label_list, x_array,y_array): data_dict[label] = pd.DataFrame({'Snow': store['y_' + str(y).zfill(4)].loc[:,x].reindex(pd.date_range(pd.to_datetime(self.start_date), pd.to_datetime(self.end_date)),fill_value=-1)}) return TableWrapper(data_dict, default_columns = ['Snow'])
def output(self): ''' Create data wrapper of grace data for specified geopoints. @return Grace Data Wrapper ''' conf = DataFetcher.getConfig() try: data_location = conf.get('grace', 'data_location') csr_filename = conf.get('grace', 'csr_filename') jpl_filename = conf.get('grace', 'jpl_filename') gfz_filename = conf.get('grace', 'gfz_filename') scale_factor_filename = conf.get('grace', 'scale_factor_filename') except (NoOptionError, NoSectionError) as exc: print('No data information available, please run: skdaccess grace') raise exc geo_point_list = self.ap_paramList[0]() csr_data, csr_meta, lat_bounds, lon_bounds = readTellusData(os.path.join(data_location, csr_filename), geo_point_list, 'lat','lon', 'lwe_thickness', 'CSR','time') jpl_data, jpl_meta, = readTellusData(os.path.join(data_location, jpl_filename), geo_point_list, 'lat','lon', 'lwe_thickness', 'JPL','time', lat_bounds=lat_bounds, lon_bounds=lon_bounds)[:2] gfz_data, gfz_meta, = readTellusData(os.path.join(data_location, gfz_filename), geo_point_list, 'lat','lon', 'lwe_thickness', 'GFZ','time', lat_bounds=lat_bounds, lon_bounds=lon_bounds)[:2] scale_factor_data, scale_factor_meta, = readTellusData(os.path.join(data_location, scale_factor_filename), geo_point_list, 'Latitude', 'Longitude', 'SCALE_FACTOR', lat_bounds=lat_bounds, lon_bounds=lon_bounds)[:2] leakage_error_data, leakage_error_meta, = readTellusData(os.path.join(data_location, scale_factor_filename), geo_point_list, 'Latitude', 'Longitude', 'LEAKAGE_ERROR', lat_bounds=lat_bounds, lon_bounds=lon_bounds)[:2] measurement_error_data, measurement_error_meta, = readTellusData(os.path.join(data_location, scale_factor_filename), geo_point_list, 'Latitude', 'Longitude', 'MEASUREMENT_ERROR', lat_bounds=lat_bounds, lon_bounds=lon_bounds)[:2] # Get appropriate time range start_date = self.start_date end_date = self.end_date def getMaskedValue(in_value): ''' Retrieve the value if not masked, otherwise return np.nan @param in_value: Input value to check @return input value or nan ''' if np.ma.is_masked(in_value): return np.nan else: return in_value if start_date == None or end_date == None: csr_start_date, csr_end_date = getStartEndDate(csr_data) jpl_start_date, jpl_end_date = getStartEndDate(jpl_data) gfz_start_date, gfz_end_date = getStartEndDate(gfz_data) if start_date == None: start_date = np.min([csr_start_date, jpl_start_date, gfz_start_date]) if end_date == None: end_date = np.max([csr_end_date, jpl_end_date, gfz_end_date]) data_dict = OrderedDict() metadata_dict = OrderedDict() for (csr_label, csr_frame), (jpl_label, jpl_frame), (gfz_label, gfz_frame) in zip(csr_data.items(), jpl_data.items(), gfz_data.items()): data = pd.concat([csr_frame.loc[start_date:end_date], jpl_frame.loc[start_date:end_date], gfz_frame.loc[start_date:end_date]], axis=1) data.index.name = 'Date' label = csr_label metadata_dict[label] = pd.Series({'scale_factor' : getMaskedValue(scale_factor_data[csr_label]), 'measurement_error' : getMaskedValue(measurement_error_data[csr_label]), 'leakage_error' : getMaskedValue(leakage_error_data[csr_label])}) data_dict[label] = data metadata_frame = pd.DataFrame.from_dict(metadata_dict) return(TableWrapper(data_dict,meta_data = metadata_frame,default_columns=['CSR','JPL','GFZ']))
def output(self): ''' Create data wrapper of grace data for specified geopoints. @return Grace Data Wrapper ''' conf = DataFetcher.getConfig() try: data_location = conf.get('grace', 'data_location') csr_filename = conf.get('grace', 'csr_filename') jpl_filename = conf.get('grace', 'jpl_filename') gfz_filename = conf.get('grace', 'gfz_filename') scale_factor_filename = conf.get('grace', 'scale_factor_filename') except (NoOptionError, NoSectionError) as exc: print('No data information available, please run: skdaccess grace') raise exc csr_data = read_grace_data(os.path.join(data_location, csr_filename), 'lat', 'lon', 'lwe_thickness', 'time') jpl_data = read_grace_data(os.path.join(data_location, jpl_filename), 'lat', 'lon', 'lwe_thickness', 'time') gfz_data = read_grace_data(os.path.join(data_location, gfz_filename), 'lat', 'lon', 'lwe_thickness', 'time') scale_factor = read_grace_data( os.path.join(data_location, scale_factor_filename), 'Latitude', 'Longitude', 'SCALE_FACTOR') leakage_error = read_grace_data( os.path.join(data_location, scale_factor_filename), 'Latitude', 'Longitude', 'LEAKAGE_ERROR') measurement_error = read_grace_data( os.path.join(data_location, scale_factor_filename), 'Latitude', 'Longitude', 'MEASUREMENT_ERROR') geo_point_list = self.ap_paramList[0]() # Get appropriate time range start_date = self.start_date end_date = self.end_date if start_date == None: start_date = np.min( [csr_data.items[0], jpl_data.items[0], gfz_data.items[0]]) if end_date == None: end_date = np.max( [csr_data.items[-1], jpl_data.items[-1], gfz_data.items[-1]]) data_dict = OrderedDict() metadata_dict = OrderedDict() for geo_point in geo_point_list: lat = geo_point[0] lon = (geo_point[1] + 360) % 360 lat_index = floor(lat) + 0.5 lon_index = floor(lon) + 0.5 data = pd.DataFrame({ 'CSR': csr_data.loc[start_date:end_date, lat_index, lon_index].copy(), 'JPL': jpl_data.loc[start_date:end_date, lat_index, lon_index].copy(), 'GFZ': gfz_data.loc[start_date:end_date, lat_index, lon_index].copy() }) data.index.name = 'Date' label = str(geo_point[0]) + ', ' + str(geo_point[1]) metadata_dict[label] = pd.Series({ 'scale_factor': scale_factor.loc[lat_index, lon_index], 'measurement_error': measurement_error.loc[lat_index, lon_index], 'leakage_error': leakage_error.loc[lat_index, lon_index] }) data_dict[label] = data metadata_frame = pd.DataFrame.from_dict(metadata_dict) return (TableWrapper(data_dict, meta_data=metadata_frame, default_columns=['CSR', 'JPL', 'GFZ']))
def output(self): ''' Generates galaxy catalog @return DataWrapper: Table data wrapper of galaxy catalog ''' if len(self.ap_paramList) > 0: seed = self.ap_paramList[0]() np.random.seed(seed) else: seed = None # Generating a background for the whole sky, and then selecting galaxies # from that background num_background_galaxies = round(self.background_density * (4 * np.pi) * (180. / np.pi)**2) # Generate background galaxies over an entire sphere full_ra = np.random.rand(num_background_galaxies) * 360. full_dec = np.arccos(2 * np.random.rand(num_background_galaxies) - 1) * 180. / np.pi - 90. # Make data frame and select galaxies in our box galaxies = pd.DataFrame.from_dict({'RA': full_ra, 'Dec': full_dec}) galaxies = galaxies[np.logical_and.reduce( (galaxies['RA'] > self.ra1, galaxies['RA'] < self.ra2, galaxies['Dec'] > self.dec1, galaxies['Dec'] < self.dec2))] galaxies['Cluster_ID'] = -1 # Now generating a cluster num_galaxies = 100 cluster_ra = np.mean([self.ra1, self.ra2]) cluster_dec = np.mean([self.dec1, self.dec2]) cosmo = FlatLambdaCDM(self.__H0, self.__Omega_m) distance = cosmo.comoving_distance(self.z).to('Mpc').value radial_positions = self.inverse_nfw_cumulative( np.random.rand(num_galaxies)) bearings = np.random.rand(num_galaxies) * 360. # Converting radial comoving distances to angular seperations angular_distances = radial_positions / distance * 180. / np.pi # Generate the RA and Dec for each galaxy by moving it in random direction # the radial distance away from the center cluster_ra, cluster_dec = move_point(cluster_ra, cluster_dec, angular_distances, bearings) cluster_galaxies = pd.DataFrame.from_dict({ 'RA': cluster_ra, 'Dec': cluster_dec }) cluster_galaxies['Cluster_ID'] = int(0) galaxies = pd.concat([galaxies, cluster_galaxies], axis=0).set_index( np.arange(len(galaxies) + len(cluster_galaxies))) data_wrapper = TableWrapper({'Cluster_Catalog_01': galaxies}, default_columns=['RA', 'Dec']) return data_wrapper
def output(self): ''' Create data wrapper of GLDAS data for specified geopoint. @return GLDAS Data Wrapper ''' data_file = DataFetcher.getDataLocation('gldas') if data_file is None: print("No data available") return None geo_point_list = self.ap_paramList[0]() full_data = read_grace_data(data_file, 'Latitude', 'Longitude', 'Water_Thickness', 'Time') # Get appropriate time range start_date = self.start_date end_date = self.end_date if start_date == None: start_date = dates.iloc[0] elif type(start_date) == str: start_date = pd.to_datetime(start_date) if end_date == None: end_date = dates.iloc[-1] elif type(end_date) == str: end_date == pd.to_datetime(end_date) data = full_data[start_date:end_date] data_dict = OrderedDict() for geo_point in geo_point_list: lat = geo_point[0] lon = (geo_point[1] + 360) % 360 lat_index = round(lat - (lat % 1)) + 0.5 lon_index = round(lon - (lon % 1)) + 0.5 gldas_data = data.loc[:, lat_index, lon_index] gldas_data.name = 'Equivalent Water Depth (cm)' gldas_unc = pd.Series(np.ones(len(gldas_data), dtype=np.float) * np.nan, index=gldas_data.index, name="Uncertainty") gldas_data = pd.concat([gldas_data, gldas_unc], axis=1) if self.resample == True: gldas_data = gldas_data.reindex( pd.date_range(start_date, end_date)) label = str(geo_point[0]) + ', ' + str(geo_point[1]) data_dict[label] = gldas_data gldas_data.columns = ['Equivalent Water Thickness (cm)', 'Uncertainty'] return (TableWrapper( data_dict, default_columns=['Equivalent Water Thickness (cm)'], default_error_columns=['Uncertainty']))