def _parse_hdus(cls, hdulist): """Parses LYRA HDU list from a FITS file""" # Open file with PyFITS fits_record = hdulist[1].data # secondary_header = hdulist[1].header # Start and end dates. Different LYRA FITS files have # different tags for the date obs. """ print(hdulist[0].header) if 'date-obs' in hdulist[0].header: start_str = hdulist[0].header['date-obs'] elif 'date_obs' in hdulist[0].header: start_str = hdulist[0].header['date_obs'] # end_str = hdulist[0].header['date-end'] """ metadata = MetaDict(OrderedDict(hdulist[0].header)) start_str = metadata.get('date-obs', metadata.get('date_obs', '')) # start = datetime.datetime.strptime(start_str, '%Y-%m-%dT%H:%M:%S.%f') start = parse_time(start_str) # end = datetime.datetime.strptime(end_str, '%Y-%m-%dT%H:%M:%S.%f') # First column are times. For level 2 data, the units are [s]. # For level 3 data, the units are [min] if hdulist[1].header['TUNIT1'] == 's': times = [ start + datetime.timedelta(seconds=n) for n in fits_record.field(0) ] elif hdulist[1].header['TUNIT1'] == 'MIN': times = [ start + datetime.timedelta(minutes=int(n)) for n in fits_record.field(0) ] else: raise ValueError("Time unit in LYRA fits file not recognised. " "Value = {0}".format(hdulist[1].header['TUNIT1'])) # Rest of columns are the data table = {} for i, col in enumerate(fits_record.columns[1:-1]): # temporary patch for big-endian data bug on pandas 0.13 if fits_record.field( i + 1).dtype.byteorder == '>' and sys.byteorder == 'little': table[col.name] = fits_record.field( i + 1).byteswap().newbyteorder() else: table[col.name] = fits_record.field(i + 1) # Return the header and the data data = pandas.DataFrame(table, index=times) data.sort_index(inplace=True) # Add the units data units = OrderedDict([('CHANNEL1', u.W / u.m**2), ('CHANNEL2', u.W / u.m**2), ('CHANNEL3', u.W / u.m**2), ('CHANNEL4', u.W / u.m**2)]) # ToDo: check: http://www.wmo-sat.info/oscar/instruments/view/733 return data, metadata, units
def __call__(self, *args, **kwargs): """ Method for running the factory. Takes arbitrary arguments and keyword arguments and passes them to a sequence of pre-registered types to determine which is the correct Map-type to build. Arguments args and kwargs are passed through to the validation function and to the constructor for the final type. For Map types, validation function must take a data-header pair as an argument. Parameters ---------- composite : boolean, optional Indicates if collection of maps should be returned as a CompositeMap cube : boolean, optional Indicates if collection of maps should be returned as a MapCube silence_errors : boolean, optional If set, ignore data-header pairs which cause an exception. Notes ----- Extra keyword arguments are passed through to `sunpy.io.read_file` such as `memmap` for FITS files. """ # Hack to get around Python 2.x not backporting PEP 3102. composite = kwargs.pop('composite', False) cube = kwargs.pop('cube', False) silence_errors = kwargs.pop('silence_errors', False) data_header_pairs, already_maps = self._parse_args(*args, **kwargs) new_maps = list() # Loop over each registered type and check to see if WidgetType # matches the arguments. If it does, use that type. for pair in data_header_pairs: data, header = pair meta = MetaDict(header) try: new_map = self._check_registered_widgets(data, meta, **kwargs) except (NoMatchError, MultipleMatchError, ValidationFunctionError): if not silence_errors: raise except: raise new_maps.append(new_map) new_maps += already_maps # If the list is meant to be a cube, instantiate a map cube if cube: return MapCube(new_maps, **kwargs) # If the list is meant to be a composite map, instantiate one if composite: return CompositeMap(new_maps, **kwargs) if len(new_maps) == 1: return new_maps[0] return new_maps
def _parse_level_0cs(filepath): """ Parses and EVE Level 0CS file. """ is_missing_data = False # boolean to check for missing data missing_data_val = np.nan header = [] fields = [] with codecs.open(filepath, mode='rb', encoding='ascii') as fp: line = fp.readline() # Read header at top of file while line.startswith(";"): header.append(line) if '; Missing data:' in line: is_missing_data = True missing_data_val = line.split(':')[1].strip() line = fp.readline() meta = MetaDict() for hline in header: if hline == '; Format:\n' or hline == '; Column descriptions:\n': continue elif ('Created' in hline) or ('Source' in hline): meta[hline.split(':', 1)[0].replace( ';', ' ').strip()] = hline.split(':', 1)[1].strip() elif ':' in hline: meta[hline.split(':')[0].replace( ';', ' ').strip()] = hline.split(':')[1].strip() fieldnames_start = False for hline in header: if hline.startswith("; Format:"): fieldnames_start = False if fieldnames_start: fields.append(hline.split(":")[0].replace(';', ' ').strip()) if hline.startswith("; Column descriptions:"): fieldnames_start = True # Next line is YYYY DOY MM DD date_parts = line.split(" ") year = int(date_parts[0]) month = int(date_parts[2]) day = int(date_parts[3]) data = read_csv(filepath, delim_whitespace=True, names=fields, comment=';', dtype={'HHMM': int}) # First line is YYYY DOY MM DD data = data.iloc[1:, :] data['Hour'] = data['HHMM'] // 100 data['Minute'] = data['HHMM'] % 100 data = data.drop(['HHMM'], axis=1) data['Year'] = year data['Month'] = month data['Day'] = day datecols = ['Year', 'Month', 'Day', 'Hour', 'Minute'] data['Time'] = to_datetime(data[datecols]) data = data.set_index('Time') data = data.drop(datecols, axis=1) if is_missing_data: # If missing data specified in header data[data == float(missing_data_val)] = np.nan # Add the units data units = OrderedDict([('XRS-B proxy', u.W / u.m**2), ('XRS-A proxy', u.W / u.m**2), ('SEM proxy', u.W / u.m**2), ('0.1-7ESPquad', u.W / u.m**2), ('17.1ESP', u.W / u.m**2), ('25.7ESP', u.W / u.m**2), ('30.4ESP', u.W / u.m**2), ('36.6ESP', u.W / u.m**2), ('darkESP', u.ct), ('121.6MEGS-P', u.W / u.m**2), ('darkMEGS-P', u.ct), ('q0ESP', u.dimensionless_unscaled), ('q1ESP', u.dimensionless_unscaled), ('q2ESP', u.dimensionless_unscaled), ('q3ESP', u.dimensionless_unscaled), ('CMLat', u.deg), ('CMLon', u.deg)]) # Todo: check units used. return data, meta, units
def _parse_level_0cs(fp): """ Parses and EVE Level 0CS file. """ is_missing_data = False # boolean to check for missing data missing_data_val = np.nan header = [] fields = [] line = fp.readline() # Read header at top of file while line.startswith(";"): header.append(line) if '; Missing data:' in line: is_missing_data = True missing_data_val = line.split(':')[1].strip() line = fp.readline() meta = MetaDict() for hline in header: if hline == '; Format:\n' or hline == '; Column descriptions:\n': continue elif ('Created' in hline) or ('Source' in hline): meta[hline.split(':', 1)[0].replace(';', ' ').strip()] = hline.split(':', 1)[1].strip() elif ':' in hline: meta[hline.split(':')[0].replace(';', ' ').strip()] = hline.split(':')[1].strip() fieldnames_start = False for hline in header: if hline.startswith("; Format:"): fieldnames_start = False if fieldnames_start: fields.append(hline.split(":")[0].replace(';', ' ').strip()) if hline.startswith("; Column descriptions:"): fieldnames_start = True # Next line is YYYY DOY MM DD date_parts = line.split(" ") year = int(date_parts[0]) month = int(date_parts[2]) day = int(date_parts[3]) def parser(x): # Parse date column (HHMM) return datetime(year, month, day, int(x[0:2]), int(x[2:4])) data = read_csv(fp, sep=r"\s+", names=fields, index_col=0, date_parser=parser, header=None, engine='python') if is_missing_data: # If missing data specified in header data[data == float(missing_data_val)] = np.nan # Add the units data units = OrderedDict([('XRS-B proxy', u.W/u.m**2), ('XRS-A proxy', u.W/u.m**2), ('SEM proxy', u.W/u.m**2), ('0.1-7ESPquad', u.W/u.m**2), ('17.1ESP', u.W/u.m**2), ('25.7ESP', u.W/u.m**2), ('30.4ESP', u.W/u.m**2), ('36.6ESP', u.W/u.m**2), ('darkESP', u.ct), ('121.6MEGS-P', u.W/u.m**2), ('darkMEGS-P', u.ct), ('q0ESP', u.dimensionless_unscaled), ('q1ESP', u.dimensionless_unscaled), ('q2ESP', u.dimensionless_unscaled), ('q3ESP', u.dimensionless_unscaled), ('CMLat', u.deg), ('CMLon', u.deg)]) # Todo: check units used. return data, meta, units
def synthetic_magnetogram(bottom_left_coord, top_right_coord, shape: u.pixel, centers, sigmas: u.arcsec, amplitudes: u.Gauss, observer=None): """ Compute synthetic magnetogram using 2D guassian "sunspots" Parameters ---------- bottom_left_coord : `~astropy.coordinates.SkyCoord` Bottom left corner top_right_coord : `~astropy.coordinates.SkyCoord` Top right corner shape : `~astropy.units.Quantity` Dimensionality of the magnetogram centers : `~astropy.coordinates.SkyCoord` Center coordinates of flux concentration sigmas : `~astropy.units.Quantity` Standard deviation of flux concentration with shape `(N,2)`, with `N` the number of flux concentrations amplitudes : `~astropy.units.Quantity` Amplitude of flux concentration with shape `(N,)` observer : `~astropy.coordinates.SkyCoord`, optional Defaults to Earth observer at current time """ time_now = astropy.time.Time.now() if observer is None: observer = sunpy.coordinates.ephemeris.get_earth(time=time_now) # Transform to HPC frame bottom_left_coord = bottom_left_coord.transform_to( sunpy.coordinates.Helioprojective(observer=observer)) top_right_coord = top_right_coord.transform_to( sunpy.coordinates.Helioprojective(observer=observer)) # Setup array delta_x = (top_right_coord.Tx - bottom_left_coord.Tx).to(u.arcsec) delta_y = (top_right_coord.Ty - bottom_left_coord.Ty).to(u.arcsec) dx = delta_x / shape[0] dy = delta_y / shape[1] data = np.zeros((int(shape[1].value), int(shape[0].value))) xphysical, yphysical = np.meshgrid( np.arange(shape[0].value) * shape.unit * dx, np.arange(shape[1].value) * shape.unit * dy) # Add sunspots centers = centers.transform_to( sunpy.coordinates.Helioprojective(observer=observer)) for c, s, a in zip(centers, sigmas, amplitudes): xc_2 = (xphysical - (c.Tx - bottom_left_coord.Tx)).to( u.arcsec).value**2.0 yc_2 = (yphysical - (c.Ty - bottom_left_coord.Ty)).to( u.arcsec).value**2.0 data += a.to( u.Gauss).value * np.exp(-xc_2 / (2 * s[0].to(u.arcsec).value**2) - yc_2 / (2 * s[1].to(u.arcsec).value**2)) # Build metadata meta = MetaDict({ 'telescop': 'synthetic_magnetic_imager', 'instrume': 'synthetic_magnetic_imager', 'detector': 'synthetic_magnetic_imager', 'bunit': 'Gauss', 'ctype1': 'HPLN-TAN', 'ctype2': 'HPLT-TAN', 'hgln_obs': observer.transform_to('heliographic_stonyhurst').lon.to(u.deg).value, 'hglt_obs': observer.transform_to('heliographic_stonyhurst').lat.to(u.deg).value, 'cunit1': 'arcsec', 'cunit2': 'arcsec', 'crpix1': (shape[0].value + 1) / 2., 'crpix2': (shape[1].value + 1) / 2., 'cdelt1': dx.value, 'cdelt2': dy.value, 'crval1': ((bottom_left_coord.Tx + top_right_coord.Tx) / 2.).to(u.arcsec).value, 'crval2': ((bottom_left_coord.Ty + top_right_coord.Ty) / 2.).to(u.arcsec).value, 'dsun_obs': observer.transform_to('heliographic_stonyhurst').radius.to(u.m).value, 'dsun_ref': observer.transform_to('heliographic_stonyhurst').radius.to(u.m).value, 'rsun_ref': const.R_sun.to(u.m).value, 'rsun_obs': ((const.R_sun / observer.transform_to('heliographic_stonyhurst').radius ).decompose() * u.radian).to(u.arcsec).value, 't_obs': time_now.iso, 'date-obs': time_now.iso, }) return GenericMap(data, meta)
def seas_metadict(sea_locations): return MetaDict(sea_locations)
class InstrumentBase(object): """ Base class for instruments. This object is not meant to be instantiated directly. Instead, specific instruments should subclass this base object and implement a `calculate_intensity_kernel` method for that specific instrument. Parameters ---------- observing_time : `~astropy.units.Quantity` Tuple of start and end observing times observer_coordinate : `~astropy.coordinates.SkyCoord` Coordinate of the observing instrument assumed_cross_section : `~astropy.units.Quantity`, optional Approximation of the loop cross-section. This defines the filling factor. pad_fov : `~astropy.units.Quantity`, optional Two-dimensional array specifying the padding to apply to the field of view of the synthetic image in both directions. If None, no padding is applied and the field of view is defined by the maximal extent of the loop coordinates in each direction. """ fits_template = MetaDict() @u.quantity_input def __init__(self, observing_time: u.s, observer, assumed_cross_section=1e14 * u.cm**2, pad_fov=None): self.observing_time = np.arange(*observing_time.to('s').value, self.cadence.to('s').value) * u.s self.observer = observer.transform_to(HeliographicStonyhurst) self.assumed_cross_section = assumed_cross_section self.pad_fov = (0, 0) * u.arcsec if pad_fov is None else pad_fov def calculate_intensity_kernel(self, *args, **kwargs): """ Converts emissivity for a particular transition to counts per detector channel. When writing a new instrument class, this method should be overridden. """ raise NotImplementedError('No detect method implemented.') def los_velocity(self, v_x, v_y, v_z): """ Compute the LOS velocity for the instrument observer """ # NOTE: transform from HEEQ to HCC with respect to the instrument observer Phi_0 = self.observer.lon.to(u.radian) B_0 = self.observer.lat.to(u.radian) v_los = v_z * np.sin(B_0) + v_x * np.cos(B_0) * np.cos( Phi_0) + v_y * np.cos(B_0) * np.sin(Phi_0) # NOTE: Negative sign to be consistent with convention v_los > 0 away from observer return -v_los @property def projected_frame(self): return Helioprojective(observer=self.observer, obstime=self.observer.obstime) @property @u.quantity_input def pixel_area(self) -> u.cm**2: """ Pixel area """ w_x, w_y = (1 * u.pix * self.resolution).to( u.radian).value * self.observer.radius return w_x * w_y def convolve_with_psf(self, data): # TODO: do the convolution here! return data def observe(self, skeleton, save_directory, channels=None, **kwargs): """ Calculate the time dependent intensity for all loops and project them along the line-of-sight as defined by the instrument observer. Parameters ---------- """ if channels is None: channels = self.channels client = distributed.get_client() coordinates = skeleton.all_coordinates coordinates_centers = skeleton.all_coordinates_centers for channel in channels: kernels = client.map(self.calculate_intensity_kernel, skeleton.loops, channel=channel, **kwargs) kernels_interp = client.map(self.interpolate_to_instrument_time, kernels, skeleton.loops, observing_time=self.observing_time) files = client.map(self.write_kernel_to_file, kernels_interp, skeleton.loops, channel=channel, name=self.name) # NOTE: block here to avoid pileup of tasks that can overwhelm the scheduler distributed.wait(files) for i, t in enumerate(self.observing_time): m = self.integrate_los(t, channel, skeleton, coordinates, coordinates_centers) m = self.convolve_with_psf(m) m.save(os.path.join(save_directory, f'm_{channel.name}_t{i}.fits'), overwrite=True) @staticmethod def write_kernel_to_file(kernel, loop, channel, name): root = zarr.open(loop.model_results_filename, 'a') if name not in root[loop.name]: root[loop.name].create_group(name) ds = root[f'{loop.name}/{name}'].create_dataset( channel.name, data=kernel.value, chunks=(None, ) + kernel.shape[:1], overwrite=True, ) ds.attrs['unit'] = kernel.unit.to_string() @staticmethod def interpolate_to_instrument_time(kernel, loop, observing_time): """ Interpolate the intensity kernel from the simulation time to the cadence of the instrument for the desired observing window. """ time = loop.time if time.shape == (1, ): if time != observing_time: raise ValueError( 'Model and observing times are not equal for a single model time step.' ) return kernel f_t = interp1d(time.to(observing_time.unit).value, kernel.value, axis=0, fill_value='extrapolate') return f_t(observing_time.value) * kernel.unit def integrate_los(self, time, channel, skeleton, coordinates, coordinates_centers): client = distributed.get_client() # Get Coordinates coords = coordinates_centers.transform_to(self.projected_frame) # Compute weights i_time = np.where(time == self.observing_time)[0][0] widths = np.concatenate( [l.field_aligned_coordinate_width for l in skeleton.loops]) loop_area = np.concatenate( [l.cross_sectional_area for l in skeleton.loops]) root = skeleton.loops[0].zarr_root # NOTE: do this outside of the client.map call to make Dask happy path = f'{{}}/{self.name}/{channel.name}' kernels = np.concatenate( client.gather( client.map( lambda l: root[path.format(l.name)][i_time, :], skeleton.loops, ))) unit_kernel = u.Unit( root[f'{skeleton.loops[0].name}/{self.name}/{channel.name}']. attrs['unit']) area_ratio = (loop_area / self.pixel_area).decompose() weights = area_ratio * widths * (kernels * unit_kernel) visible = is_visible(coords, self.observer) # Bin bins, (blc, trc) = self.get_detector_array(coordinates) hist, _, _ = np.histogram2d( coords.Tx.value, coords.Ty.value, bins=bins, range=((blc.Tx.value, trc.Tx.value), (blc.Ty.value, trc.Ty.value)), weights=weights.value * visible, ) header = self.get_header(channel, coordinates) header['bunit'] = weights.unit.decompose().to_string() header['date-obs'] = (self.observer.obstime + time).isot return Map(hist.T, header) def get_header(self, channel, coordinates): """ Create the FITS header for a given channel and set of loop coordinates that define the needed FOV. """ bins, bin_range = self.get_detector_array(coordinates) header = make_fitswcs_header( (bins[1], bins[0]), # swap order because it expects (row,column) bin_range[ 0], # align with the lower left corner of the lower left pixel reference_pixel=(-0.5, -0.5) * u.pixel, # center of the lower left pixel is (0,0) scale=self.resolution, instrument=f'{self.detector}_{channel.telescope_number}', telescope=self.telescope, wavelength=channel.channel, ) return header def get_detector_array(self, coordinates): """ Calculate the number of pixels in the detector FOV and the physical coordinates of the bottom left and top right corners. """ coordinates = coordinates.transform_to(self.projected_frame) # NOTE: this is the coordinate of the bottom left corner of the bottom left corner pixel, # NOT the coordinate at the center of the pixel! bottom_left_corner = SkyCoord( Tx=coordinates.Tx.min() - self.pad_fov[0], Ty=coordinates.Ty.min() - self.pad_fov[1], frame=coordinates.frame) bins_x = int( np.ceil((coordinates.Tx.max() + self.pad_fov[0] - bottom_left_corner.Tx) / self.resolution[0]).value) bins_y = int( np.ceil((coordinates.Ty.max() + self.pad_fov[1] - bottom_left_corner.Ty) / self.resolution[1]).value) # Compute right corner after the fact to account for rounding in bin numbers # NOTE: this is the coordinate of the top right corner of the top right corner pixel, NOT # the coordinate at the center of the pixel! top_right_corner = SkyCoord( Tx=bottom_left_corner.Tx + self.resolution[0] * bins_x * u.pixel, Ty=bottom_left_corner.Ty + self.resolution[1] * bins_y * u.pixel, frame=coordinates.frame) return (bins_x, bins_y), (bottom_left_corner, top_right_corner)
def test_invalid_manual_data(): meta = MetaDict({'key': 'value'}) data = [] with pytest.raises(NoMatchError): sunpy.timeseries.TimeSeries(data, meta)
def generate_jedi_catalog( threshold_time_prior_flare_minutes=240.0, dimming_window_relative_to_flare_minutes_left=0.0, dimming_window_relative_to_flare_minutes_right=240.0, threshold_minimum_dimming_window_minutes=120.0, flare_index_range=range(0, 5052), output_path='/Users/shawnpolson/Documents/School/Spring 2018/Data Mining/StealthCMEs/PyCharm/JEDI Catalog/', verbose=True): """Wrapper code for creating James's Extreme Ultraviolet Variability Experiment (EVE) Dimming Index (JEDI) catalog. Inputs: None. Optional Inputs: threshold_time_prior_flare_minutes [float]: How long before a particular event does the last one need to have occurred to be considered independent. If the previous one was too recent, will use that event's pre-flare irradiance. Default is 240 (4 hours). dimming_window_relative_to_flare_minutes_left [float]: Defines the left side of the time window to search for dimming relative to the GOES/XRS flare peak. Negative numbers mean minutes prior to the flare peak. Default is 0.0. dimming_window_relative_to_flare_minutes_right [float]: Defines the right side of the time window to search for dimming relative to the GOES/XRS flare peak. If another flare occurs before this, that time will define the end of the window instead. Default is 240 (4 hours). threshold_minimum_dimming_window_minutes [float]: The smallest allowed time window in which to search for dimming. Default is 120. flare_index_range [range] The range of GOES flare indices to process. Default is range(0, 5052). output_path [str]: Set to a path for saving the JEDI catalog table and processing summary plots. Default is '/Users/jmason86/Dropbox/Research/Postdoc_NASA/Analysis/Coronal Dimming Analysis/JEDI Catalog/'. verbose [bool]: Set to log the processing messages to disk and console. Default is False. Outputs: No direct return, but writes a (csv? sql table? hdf5?) to disk with the dimming paramerization results. Subroutines also optionally save processing plots to disk in output_path. Optional Outputs: None Example: generate_jedi_catalog(output_path='/Users/jmason86/Dropbox/Research/Postdoc_NASA/Analysis/Coronal Dimming Analysis/JEDI Catalog/', verbose=True) """ # Prepare the logger for verbose if verbose: logger = JpmLogger(filename='generate_jedi_catalog', path=output_path, console=False) logger.info("Starting JEDI processing pipeline.") logger.info("Processing events {0} - {1}".format( flare_index_range[0], flare_index_range[-1])) else: logger = None # Get EVE level 2 extracted emission lines data # TODO: Replace this shortcut method with the method I'm building into sunpy from scipy.io.idl import readsav eve_readsav = readsav( '/Users/shawnpolson/Documents/School/Spring 2018/Data Mining/StealthCMEs/savesets/eve_lines_2010121-2014146 MEGS-A Mission Bare Bones.sav' ) if verbose: logger.info('Loaded EVE data') # Create metadata dictionary # TODO: Replace this shortcut method with the method I'm building into sunpy from sunpy.util.metadata import MetaDict metadata = MetaDict() metadata['ion'] = eve_readsav['name'] metadata['temperature_ion_peak_formation'] = np.power( 10.0, eve_readsav['logt']) * u.Kelvin metadata['extracted_wavelength_center'] = eve_readsav['wavelength'] * u.nm metadata['extracted_wavelength_min'] = metadata[ 'extracted_wavelength_center'] metadata['extracted_wavelength_max'] = metadata[ 'extracted_wavelength_center'] metadata['emission_line_blends'] = ['none', 'yay', 'poop', 'Fe vi'] # etc metadata[ 'exposure_time'] = 60.0 * u.second # These example EVE data are already binned down to 1 minute metadata['precision'] = ['Not implemented in prototype'] metadata['accuracy'] = ['Not implemented in prototype'] metadata['flags'] = ['Not implemented in prototype'] metadata['flags_description'] = '1 = MEGS-A data is missing, ' \ '2 = MEGS-B data is missing, ' \ '4 = ESP data is missing, ' \ '8 = MEGS-P data is missing, ' \ '16 = Possible clock adjust in MEGS-A, ' \ '32 = Possible clock adjust in MEGS-B, ' \ '64 = Possible clock adjust in ESP, ' \ '128 = Possible clock adjust in MEGS-P' metadata['flags_spacecraft'] = ['Not implemented in prototype'] metadata['flags_spacecraft_description'] = '0 = No obstruction, ' \ '1 = Warm up from Earth eclipse, ' \ '2 = Obstruction atmosphere penumbra, ' \ '3 = Obstruction atmosphere umbra, ' \ '4 = Obstruction penumbra of Mercury, ' \ '5 = Obstruction penumbra of Mercury, ' \ '6 = Obstruction penumbra of Venus, ' \ '7 = Obstruction umbra of Venus, ' \ '8 = Obstruction penumbra of Moon, ' \ '9 = Obstruction umbra of Moon, ' \ '10 = Obstruction penumbra of solid Earth, ' \ '11 = Obstruction umbra of solid Earth, ' \ '16 = Observatory is off-pointed by more than 1 arcmin' metadata['data_version'] = ['Not implemented in prototype'] metadata['data_reprocessed_revision'] = ['Not implemented in prototype'] metadata['filename'] = ['Not implemented in prototype'] # Load up the actual irradiance data into a pandas DataFrame # TODO: Replace this shortcut method with the method I'm building into sunpy irradiance = eve_readsav['irradiance'].byteswap().newbyteorder( ) # pandas doesn't like big endian irradiance[irradiance == -1] = np.nan wavelengths = eve_readsav['wavelength'] wavelengths_str = [] [ wavelengths_str.append('{0:1.1f}'.format(wavelength)) for wavelength in wavelengths ] eve_lines = pd.DataFrame(irradiance, columns=wavelengths_str) eve_lines.index = pd.to_datetime(eve_readsav.iso.astype(str)) eve_lines = eve_lines.drop_duplicates() # slice out only columns needed by Shawn # eve_selected_lines = eve_lines.drop(columns=['9.4', '13.1', '13.3', '25.6', '28.4', '30.4', '33.5', '36.1', '36.8', '44.6', '46.5', '49.9', '52.1', '52.6', '53.7', '55.4', '56.8', '58.4', '59.2', '60.0', '61.0', '62.5', '63.0', '71.9', '72.2', '77.0', '79.0', '83.6', '95.0', '97.3', '97.7', '102.6', '103.2']) # eve_selected_lines.info() # eve_selected_lines.to_csv('/Users/shawnpolson/Documents/School/Spring 2018/Data Mining/StealthCMEs/PyCharm/JEDI Catalog/eve_selected_lines_forreal.csv') # Get GOES flare events above C1 within date range corresponding to EVE data # flares = get_goes_flare_events(eve_lines.index[0], eve_lines.index[-1], verbose=verbose) # TODO: The method in sunpy needs fixing, issue 2434 # Load GOES events from IDL saveset instead of directly through sunpy goes_flare_events = readsav( '/Users/shawnpolson/Documents/School/Spring 2018/Data Mining/StealthCMEs/savesets/GoesEventsMegsAEra.sav' ) goes_flare_events['class'] = goes_flare_events['class'].astype(str) goes_flare_events['event_peak_time_human'] = goes_flare_events[ 'event_peak_time_human'].astype(str) goes_flare_events['event_start_time_human'] = goes_flare_events[ 'event_start_time_human'].astype(str) goes_flare_events['peak_time'] = Time( goes_flare_events['event_peak_time_jd'], format='jd', scale='utc') goes_flare_events['start_time'] = Time( goes_flare_events['event_start_time_jd'], format='jd', scale='utc') if verbose: logger.info('Loaded GOES flare events.') # Define the columns of the JEDI catalog jedi_row = pd.DataFrame([ OrderedDict([('Event #', np.nan), ('GOES Flare Start Time', np.nan), ('GOES Flare Peak Time', np.nan), ('GOES Flare Class', np.nan), ('Pre-Flare Start Time', np.nan), ('Pre-Flare End Time', np.nan), ('Flare Interrupt', np.nan)]) ]) jedi_row = jedi_row.join( pd.DataFrame(columns=eve_lines.columns + ' Pre-Flare Irradiance [W/m2]')) jedi_row = jedi_row.join( pd.DataFrame(columns=eve_lines.columns + ' Slope Start Time')) jedi_row = jedi_row.join( pd.DataFrame(columns=eve_lines.columns + ' Slope End Time')) jedi_row = jedi_row.join( pd.DataFrame(columns=eve_lines.columns + ' Slope Min [%/s]')) jedi_row = jedi_row.join( pd.DataFrame(columns=eve_lines.columns + ' Slope Max [%/s]')) jedi_row = jedi_row.join( pd.DataFrame(columns=eve_lines.columns + ' Slope Mean [%/s]')) jedi_row = jedi_row.join( pd.DataFrame(columns=eve_lines.columns + ' Slope Uncertainty [%/s]')) jedi_row = jedi_row.join( pd.DataFrame(columns=eve_lines.columns + ' Depth Time')) jedi_row = jedi_row.join( pd.DataFrame(columns=eve_lines.columns + ' Depth [%]')) jedi_row = jedi_row.join( pd.DataFrame(columns=eve_lines.columns + ' Depth Uncertainty [%]')) jedi_row = jedi_row.join( pd.DataFrame(columns=eve_lines.columns + ' Duration Start Time')) jedi_row = jedi_row.join( pd.DataFrame(columns=eve_lines.columns + ' Duration End Time')) jedi_row = jedi_row.join( pd.DataFrame(columns=eve_lines.columns + ' Duration [s]')) jedi_row = jedi_row.join( pd.DataFrame(columns=eve_lines.columns + ' Fitting Gamma')) jedi_row = jedi_row.join( pd.DataFrame(columns=eve_lines.columns + ' Fitting Score')) ion_tuples = list(itertools.permutations(eve_lines.columns.values, 2)) ion_permutations = pd.Index( [' by '.join(ion_tuples[i]) for i in range(len(ion_tuples))]) jedi_row = jedi_row.join( pd.DataFrame(columns=ion_permutations + ' Slope Start Time')) jedi_row = jedi_row.join( pd.DataFrame(columns=ion_permutations + ' Slope End Time')) jedi_row = jedi_row.join( pd.DataFrame(columns=ion_permutations + ' Slope Min [%/s]')) jedi_row = jedi_row.join( pd.DataFrame(columns=ion_permutations + ' Slope Max [%/s]')) jedi_row = jedi_row.join( pd.DataFrame(columns=ion_permutations + ' Slope Mean [%/s]')) jedi_row = jedi_row.join( pd.DataFrame(columns=ion_permutations + ' Slope Uncertainty [%/s]')) jedi_row = jedi_row.join( pd.DataFrame(columns=ion_permutations + ' Depth Time')) jedi_row = jedi_row.join( pd.DataFrame(columns=ion_permutations + ' Depth [%]')) jedi_row = jedi_row.join( pd.DataFrame(columns=ion_permutations + ' Depth Uncertainty [%]')) jedi_row = jedi_row.join( pd.DataFrame(columns=ion_permutations + ' Duration Start Time')) jedi_row = jedi_row.join( pd.DataFrame(columns=ion_permutations + ' Duration End Time')) jedi_row = jedi_row.join( pd.DataFrame(columns=ion_permutations + ' Duration [s]')) jedi_row = jedi_row.join( pd.DataFrame(columns=ion_permutations + ' Correction Time Shift [s]')) jedi_row = jedi_row.join( pd.DataFrame(columns=ion_permutations + ' Correction Scale Factor')) jedi_row = jedi_row.join( pd.DataFrame(columns=ion_permutations + ' Fitting Gamma')) jedi_row = jedi_row.join( pd.DataFrame(columns=ion_permutations + ' Fitting Score')) csv_filename = output_path + 'jedi_{0}.csv'.format(Time.now().iso) jedi_row.to_csv(csv_filename, header=True, index=False, mode='w') if verbose: logger.info('Created JEDI row definition.') # Start a progress bar widgets = [ progressbar.Percentage(), progressbar.Bar(), progressbar.Timer(), ' ', progressbar.AdaptiveETA() ] progress_bar = progressbar.ProgressBar( widgets=[progressbar.FormatLabel('Flare Event Loop: ')] + widgets, min_value=flare_index_range[0], max_value=flare_index_range[-1]).start() # Prepare a hold-over pre-flare irradiance value, # which will normally have one element for each of the 39 emission lines preflare_irradiance = np.nan # Start loop through all flares for flare_index in flare_index_range: # Skip event 0 to avoid problems with referring to earlier indices if flare_index == 0: continue # Reset jedi_row jedi_row[:] = np.nan # Reset the flare interrupt flag flare_interrupt = False # Fill the GOES flare information into the JEDI row jedi_row['Event #'] = flare_index jedi_row['GOES Flare Start Time'] = goes_flare_events['start_time'][ flare_index].iso jedi_row['GOES Flare Peak Time'] = goes_flare_events['peak_time'][ flare_index].iso jedi_row['GOES Flare Class'] = goes_flare_events['class'][flare_index] if verbose: logger.info( "Event {0} GOES flare details stored to JEDI row.".format( flare_index)) # If haven't already done all pre-parameterization processing processed_jedi_non_params_filename = output_path + 'Processed Pre-Parameterization Data/Event {0} Pre-Parameterization.h5'.format( flare_index) processed_lines_filename = output_path + 'Processed Lines Data/Event {0} Lines.h5'.format( flare_index) if not os.path.isfile(processed_lines_filename) or not os.path.isfile( processed_jedi_non_params_filename): # Determine pre-flare irradiance minutes_since_last_flare = ( goes_flare_events['peak_time'][flare_index] - goes_flare_events['peak_time'][flare_index - 1]).sec / 60.0 if minutes_since_last_flare > threshold_time_prior_flare_minutes: # Clip EVE data from threshold_time_prior_flare_minutes prior to flare up to peak flare time preflare_window_start = ( goes_flare_events['peak_time'][flare_index] - (threshold_time_prior_flare_minutes * u.minute)).iso preflare_window_end = ( goes_flare_events['peak_time'][flare_index]).iso eve_lines_preflare_time = eve_lines[ preflare_window_start:preflare_window_end] # Loop through the emission lines and get pre-flare irradiance for each preflare_irradiance = [] for column in eve_lines_preflare_time: eve_line_preflare_time = pd.DataFrame( eve_lines_preflare_time[column]) eve_line_preflare_time.columns = ['irradiance'] preflare_irradiance.append( determine_preflare_irradiance( eve_line_preflare_time, pd.Timestamp(goes_flare_events['start_time'] [flare_index].iso), plot_path_filename=output_path + 'Preflare Determination/Event {0} {1}.png'.format( flare_index, column), verbose=verbose, logger=logger)) plt.close('all') else: logger.info( "This flare at {0} will use the pre-flare irradiance from flare at {1}." .format( goes_flare_events['peak_time'][flare_index].iso, goes_flare_events['peak_time'][flare_index - 1].iso)) jedi_row["Pre-Flare Start Time"] = preflare_window_start jedi_row["Pre-Flare End Time"] = preflare_window_end preflare_irradiance_cols = [ col for col in jedi_row.columns if 'Pre-Flare Irradiance' in col ] jedi_row[preflare_irradiance_cols] = preflare_irradiance if verbose: logger.info( "Event {0} pre-flare determination complete.".format( flare_index)) # Clip EVE data to dimming window bracket_time_left = ( goes_flare_events['peak_time'][flare_index] - (dimming_window_relative_to_flare_minutes_left * u.minute)) next_flare_time = Time( (goes_flare_events['peak_time'][flare_index + 1]).iso) user_choice_time = ( goes_flare_events['peak_time'][flare_index] + (dimming_window_relative_to_flare_minutes_right * u.minute)) bracket_time_right = min(next_flare_time, user_choice_time) # If flare is shortening the window, set the flare_interrupt flag if bracket_time_right == next_flare_time: flare_interrupt = True if verbose: logger.info( 'Flare interrupt for event at {0} by flare at {1}'. format(goes_flare_events['peak_time'][flare_index].iso, next_flare_time)) # Write flare_interrupt to JEDI row jedi_row['Flare Interrupt'] = flare_interrupt # Skip event if the dimming window is too short if ((bracket_time_right - bracket_time_left).sec / 60.0) < threshold_minimum_dimming_window_minutes: # Leave all dimming parameters as NaN and write this null result to the CSV on disk jedi_row.to_csv(csv_filename, header=False, index=False, mode='a') # Log message if verbose: logger.info( 'The dimming window duration of {0} minutes is shorter than the minimum threshold of {1} minutes. Skipping this event ({2})' .format(((bracket_time_right - bracket_time_left).sec / 60.0), threshold_minimum_dimming_window_minutes, goes_flare_events['peak_time'][flare_index])) # Skip the rest of the processing in the flare_index loop continue else: eve_lines_event = eve_lines[bracket_time_left. iso:bracket_time_right.iso] if verbose: logger.info( "Event {0} EVE data clipped to dimming window.".format( flare_index)) # Convert irradiance units to percent # (in place, don't care about absolute units from this point forward) eve_lines_event = (eve_lines_event - preflare_irradiance ) / preflare_irradiance * 100.0 if verbose: logger.info( "Event {0} irradiance converted from absolute to percent units." .format(flare_index)) # Do flare removal in the light curves and add the results to the DataFrame progress_bar_correction = progressbar.ProgressBar( widgets=[progressbar.FormatLabel('Peak Match Subtract: ')] + widgets, max_value=len(ion_tuples)).start() for i in range(len(ion_tuples)): light_curve_to_subtract_from_df = pd.DataFrame( eve_lines_event[ion_tuples[i][0]]) light_curve_to_subtract_from_df.columns = ['irradiance'] light_curve_to_subtract_with_df = pd.DataFrame( eve_lines_event[ion_tuples[i][1]]) light_curve_to_subtract_with_df.columns = ['irradiance'] if (light_curve_to_subtract_from_df.isnull().all().all()) or ( light_curve_to_subtract_with_df.isnull().all().all()): if verbose: logger.info( 'Event {0} {1} correction skipped because all irradiances are NaN.' .format(flare_index, ion_permutations[i])) else: light_curve_corrected, seconds_shift, scale_factor = light_curve_peak_match_subtract( light_curve_to_subtract_from_df, light_curve_to_subtract_with_df, pd.Timestamp( (goes_flare_events['peak_time'][flare_index]).iso), plot_path_filename=output_path + 'Peak Subtractions/Event {0} {1}.png'.format( flare_index, ion_permutations[i]), verbose=verbose, logger=logger) eve_lines_event[ ion_permutations[i]] = light_curve_corrected jedi_row[ion_permutations[i] + ' Correction Time Shift [s]'] = seconds_shift jedi_row[ion_permutations[i] + ' Correction Scale Factor'] = scale_factor plt.close('all') if verbose: logger.info( 'Event {0} flare removal correction complete'. format(flare_index)) progress_bar_correction.update(i) progress_bar_correction.finish() # TODO: Update calculate_eve_fe_line_precision to compute for all emission lines, not just selected uncertainty = np.ones(len(eve_lines_event)) * 0.002545 # TODO: Propagate uncertainty through light_curve_peak_match_subtract and store in eve_lines_event # Fit the light curves to reduce influence of noise on the parameterizations to come later progress_bar_fitting = progressbar.ProgressBar( widgets=[progressbar.FormatLabel('Light curve fitting: ')] + widgets, max_value=len(eve_lines_event.columns)).start() for i, column in enumerate(eve_lines_event): if eve_lines_event[column].isnull().all().all(): if verbose: logger.info( 'Event {0} {1} fitting skipped because all irradiances are NaN.' .format(flare_index, column)) else: eve_line_event = pd.DataFrame(eve_lines_event[column]) eve_line_event.columns = ['irradiance'] eve_line_event['uncertainty'] = uncertainty fitting_path = output_path + 'Fitting/' if not os.path.exists(fitting_path): os.makedirs(fitting_path) plt.close('all') light_curve_fit, best_fit_gamma, best_fit_score = automatic_fit_light_curve( eve_line_event, plots_save_path='{0} Event {1} {2} '.format( fitting_path, flare_index, column), verbose=verbose, logger=logger) eve_lines_event[column] = light_curve_fit jedi_row[column + ' Fitting Gamma'] = best_fit_gamma jedi_row[column + ' Fitting Score'] = best_fit_score if verbose: logger.info( 'Event {0} {1} light curves fitted.'.format( flare_index, column)) progress_bar_fitting.update(i) progress_bar_fitting.finish() # # Save the dimming event data to disk for quicker restore # jedi_row.to_hdf(processed_jedi_non_params_filename, 'jedi_row') # eve_lines_event.to_hdf(processed_lines_filename, 'eve_lines_event') # else: # jedi_row = pd.read_hdf(processed_jedi_non_params_filename, 'jedi_row') # eve_lines_event = pd.read_hdf(processed_lines_filename, 'eve_lines_event') # if verbose: # logger.info('Loading files {0} and {1} rather than processing again.'.format(processed_jedi_non_params_filename, processed_lines_filename)) # # # Parameterize the light curves for dimming # for column in eve_lines_event: # # # Null out all parameters # depth_percent, depth_time = np.nan, np.nan # slope_start_time, slope_end_time = np.nan, np.nan # slope_min, slope_max, slope_mean = np.nan, np.nan, np.nan # duration_seconds, duration_start_time, duration_end_time = np.nan, np.nan, np.nan # # # Determine whether to do the parameterizations or not # if eve_lines_event[column].isnull().all().all(): # if verbose: # logger.info('Event {0} {1} parameterization skipped because all irradiances are NaN.'.format(flare_index, column)) # else: # eve_line_event = pd.DataFrame(eve_lines_event[column]) # eve_line_event.columns = ['irradiance'] # # # Determine dimming depth (if any) # depth_path = output_path + 'Depth/' # if not os.path.exists(depth_path): # os.makedirs(depth_path) # # plt.close('all') # depth_percent, depth_time = determine_dimming_depth(eve_line_event, # plot_path_filename='{0} Event {1} {2} Depth.png'.format(depth_path, flare_index, column), # verbose=verbose, logger=logger) # # jedi_row[column + ' Depth [%]'] = depth_percent # # jedi_row[column + ' Depth Uncertainty [%]'] = depth_uncertainty # TODO: make determine_dimming_depth return the propagated uncertainty # jedi_row[column + ' Depth Time'] = depth_time # # # Determine dimming slope (if any) # slope_path = output_path + 'Slope/' # if not os.path.exists(slope_path): # os.makedirs(slope_path) # # slope_start_time = pd.Timestamp((goes_flare_events['peak_time'][flare_index]).iso) # slope_end_time = depth_time # # if (pd.isnull(slope_start_time)) or (pd.isnull(slope_end_time)): # if verbose: # logger.warning('Cannot compute slope or duration because slope bounding times NaN.') # else: # plt.close('all') # slope_min, slope_max, slope_mean = determine_dimming_slope(eve_line_event, # earliest_allowed_time=slope_start_time, # latest_allowed_time=slope_end_time, # plot_path_filename='{0} Event {1} {2} Slope.png'.format(slope_path, flare_index, column), # verbose=verbose, logger=logger) # # jedi_row[column + ' Slope Min [%/s]'] = slope_min # jedi_row[column + ' Slope Max [%/s]'] = slope_max # jedi_row[column + ' Slope Mean [%/s]'] = slope_mean # # jedi_row[column + ' Slope Uncertainty [%]'] = slope_uncertainty # TODO: make determine_dimming_depth return the propagated uncertainty # jedi_row[column + ' Slope Start Time'] = slope_start_time # jedi_row[column + ' Slope End Time'] = slope_end_time # # # Determine dimming duration (if any) # duration_path = output_path + 'Duration/' # if not os.path.exists(duration_path): # os.makedirs(duration_path) # # plt.close('all') # duration_seconds, duration_start_time, duration_end_time = determine_dimming_duration(eve_line_event, # earliest_allowed_time=slope_start_time, # plot_path_filename='{0} Event {1} {2} Duration.png'.format(duration_path, flare_index, column), # verbose=verbose, logger=logger) # # jedi_row[column + ' Duration [s]'] = duration_seconds # jedi_row[column + ' Duration Start Time'] = duration_start_time # jedi_row[column + ' Duration End Time'] = duration_end_time # # if verbose: # logger.info("Event {0} {1} parameterizations complete.".format(flare_index, column)) # # # Produce a summary plot for each light curve # plt.style.use('jpm-transparent-light') # # ax = eve_line_event['irradiance'].plot(color='black') # plt.axhline(linestyle='dashed', color='grey') # start_date = eve_line_event.index.values[0] # start_date_string = pd.to_datetime(str(start_date)) # plt.xlabel(start_date_string.strftime('%Y-%m-%d %H:%M:%S')) # plt.ylabel('Irradiance [%]') # fmtr = dates.DateFormatter("%H:%M:%S") # ax.xaxis.set_major_formatter(fmtr) # ax.xaxis.set_major_locator(dates.HourLocator()) # plt.title('Event {0} {1} nm Parameters'.format(flare_index, column)) # # if not np.isnan(depth_percent): # plt.annotate('', xy=(depth_time, -depth_percent), xycoords='data', # xytext=(depth_time, 0), textcoords='data', # arrowprops=dict(facecolor='limegreen', edgecolor='limegreen', linewidth=2)) # mid_depth = -depth_percent / 2.0 # plt.annotate('{0:.2f} %'.format(depth_percent), xy=(depth_time, mid_depth), xycoords='data', # ha='right', va='center', rotation=90, size=18, color='limegreen') # # if not np.isnan(slope_mean): # if pd.isnull(slope_start_time) or pd.isnull(slope_end_time): # import pdb # pdb.set_trace() # p = plt.plot(eve_line_event[slope_start_time:slope_end_time]['irradiance'], c='tomato') # # inverse_str = '$^{-1}$' # plt.annotate('slope_min={0} % s{1}'.format(latex_float(slope_min), inverse_str), # xy=(0.98, 0.12), xycoords='axes fraction', ha='right', # size=12, color=p[0].get_color()) # plt.annotate('slope_max={0} % s{1}'.format(latex_float(slope_max), inverse_str), # xy=(0.98, 0.08), xycoords='axes fraction', ha='right', # size=12, color=p[0].get_color()) # plt.annotate('slope_mean={0} % s{1}'.format(latex_float(slope_mean), inverse_str), # xy=(0.98, 0.04), xycoords='axes fraction', ha='right', # size=12, color=p[0].get_color()) # # if not np.isnan(duration_seconds): # plt.annotate('', xy=(duration_start_time, 0), xycoords='data', # xytext=(duration_end_time, 0), textcoords='data', # arrowprops=dict(facecolor='dodgerblue', edgecolor='dodgerblue', linewidth=5, arrowstyle='<->')) # mid_time = duration_start_time + (duration_end_time - duration_start_time) / 2 # plt.annotate(str(duration_seconds) + ' s', xy=(mid_time, 0), xycoords='data', ha='center', va='bottom', size=18, color='dodgerblue') # # summary_path = output_path + 'Summary Plots/' # if not os.path.exists(summary_path): # os.makedirs(summary_path) # summary_filename = '{0} Event {1} {2} Parameter Summary.png'.format(summary_path, flare_index, column) # plt.savefig(summary_filename) # if verbose: # logger.info("Summary plot saved to %s" % summary_filename) # # # Write to the JEDI catalog on disk # jedi_row.to_csv(csv_filename, header=False, index=False, mode='a') # if verbose: # logger.info('Event {0} JEDI row written to {1}.'.format(flare_index, csv_filename)) # Update progress bar progress_bar.update(flare_index) progress_bar.finish()
class InstrumentBase(object): """ Base class for instruments. Need to at least implement a detect() method that is used by the `Observer` class to get the detector counts. Parameters ---------- observing_time : `~astropy.units.Quantity` Tuple of start and end observing times observer_coordinate : `~astropy.coordinates.SkyCoord` Coordinate of the observing instrument """ fits_template = MetaDict() @u.quantity_input def __init__(self, observing_time: u.s, observer_coordinate): self.observing_time = np.arange(observing_time[0].to( u.s).value, observing_time[1].to(u.s).value, self.cadence.value) * u.s self.observer_coordinate = observer_coordinate def detect(self, *args, **kwargs): """ Converts emissivity for a particular transition to counts per detector channel. When writing a new instrument class, this method should be overridden. """ raise NotImplementedError('No detect method implemented.') def build_detector_file(self, file_template, dset_shape, chunks, *args, **kwargs): """ Allocate space for counts data. """ dset_names = [ 'density', 'electron_temperature', 'ion_temperature', 'velocity_x', 'velocity_y', 'velocity_z' ] dset_names += kwargs.get('additional_fields', []) self.counts_file = file_template.format(self.name) with h5py.File(self.counts_file, 'a') as hf: if 'time' not in hf: dset = hf.create_dataset('time', data=self.observing_time.value) dset.attrs['units'] = self.observing_time.unit.to_string() for dn in dset_names: if dn not in hf: hf.create_dataset(dn, dset_shape, chunks=chunks) @property def total_coordinates(self): """ Helioprojective coordinates for all loops for the instrument observer """ if not hasattr(self, 'counts_file'): raise AttributeError( f'''No counts file found for {self.name}. Build it first using Observer.build_detector_files''') with h5py.File(self.counts_file, 'r') as hf: total_coordinates = u.Quantity(hf['coordinates'], hf['coordinates'].attrs['units']) coords = SkyCoord(x=total_coordinates[:, 0], y=total_coordinates[:, 1], z=total_coordinates[:, 2], frame=HeliographicStonyhurst, representation='cartesian') # This extra transform-to is due to a bug where to convert out of an HEEQ frame # one must first transform to a polar HGS frame # FIXME: once this is fixed upstream in SunPy, this can be removed return coords.transform_to(HeliographicStonyhurst).transform_to( Helioprojective(observer=self.observer_coordinate)) def los_velocity(self, v_x, v_y, v_z): """ Compute the LOS velocity for the instrument observer """ # NOTE: transform from HEEQ to HCC with respect to the instrument observer obs = self.observer_coordinate.transform_to(HeliographicStonyhurst) Phi_0, B_0 = obs.lon.to(u.radian), obs.lat.to(u.radian) v_los = v_z * np.sin(B_0) + v_x * np.cos(B_0) * np.cos( Phi_0) + v_y * np.cos(B_0) * np.sin(Phi_0) # NOTE: Negative sign to be consistent with convention v_los > 0 away from observer return -v_los def interpolate_and_store(self, y, loop, interp_s, start_index=None, save_dir=False, dset_name=None): """ Interpolate in time and space and write to HDF5 file. """ if type(y) is str: y = getattr(loop, y) f_s = interp1d(loop.field_aligned_coordinate.value, y.value, axis=1, kind='linear') y_s = f_s(interp_s) if loop.time.shape == (1, ): # If static case, no need to interpolate in time # But require that the observing and loop times are the same assert np.all(loop.time == self.observing_time) interpolated_y = y_s else: f_t = interp1d(loop.time.value, y_s, axis=0, kind='linear', fill_value='extrapolate') interpolated_y = f_t(self.observing_time.value) if save_dir: save_path = os.path.join( save_dir, f'{loop.name}_{self.name}_{dset_name}.pkl') with open(save_path, 'wb') as f: pickle.dump((interpolated_y, y.unit.to_string(), start_index, dset_name), f) return save_path else: return interpolated_y * y.unit @staticmethod def assemble_arrays(interp_files, savefile): """ Assemble interpolated results into single file """ with h5py.File(savefile, 'a', driver=None) as hf: for filename in interp_files: with open(filename, 'rb') as f: y, units, start_index, dset_name = pickle.load(f) tmp = u.Quantity(y, units) InstrumentBase.commit(tmp, hf[dset_name], start_index) return interp_files @staticmethod def commit(y, dset, start_index): if 'units' not in dset.attrs: dset.attrs['units'] = y.unit.to_string() dset[:, start_index:(start_index + y.shape[1])] = y.value @staticmethod def generic_2d_histogram(counts_filename, dset_name, i_time, bins, bin_range): """ Turn flattened quantity into 2D weighted histogram """ with h5py.File(counts_filename, 'r') as hf: weights = np.array(hf[dset_name][i_time, :]) units = u.Unit(hf[dset_name].attrs['units']) coordinates = np.array(hf['coordinates'][:, :2]) hc, _ = np.histogramdd(coordinates, bins=bins[:2], range=bin_range[:2]) h, _ = np.histogramdd(coordinates, bins=bins[:2], range=bin_range[:2], weights=weights) h /= np.where(hc == 0, 1, hc) return h.T * units def make_fits_header(self, field, channel): """ Build up FITS header with relevant instrument information. """ min_x, max_x, min_y, max_y = self._get_fov(field.magnetogram) bins, _ = self.make_detector_array(field) fits_header = MetaDict() fits_header['crval1'] = (min_x + (max_x - min_x) / 2).value fits_header['crval2'] = (min_y + (max_y - min_y) / 2).value fits_header['cunit1'] = self.total_coordinates.Tx.unit.to_string() fits_header['cunit2'] = self.total_coordinates.Ty.unit.to_string() fits_header['hglt_obs'] = self.observer_coordinate.lat.to(u.deg).value fits_header['hgln_obs'] = self.observer_coordinate.lon.to(u.deg).value fits_header['ctype1'] = 'HPLN-TAN' fits_header['ctype2'] = 'HPLT-TAN' fits_header['date-obs'] = field.magnetogram.meta['date-obs'] fits_header['dsun_obs'] = self.observer_coordinate.radius.to(u.m).value fits_header['rsun_obs'] = ( (constants.radius / (self.observer_coordinate.radius - constants.radius)).decompose() * u.radian).to(u.arcsec).value fits_header['cdelt1'] = self.resolution.x.value fits_header['cdelt2'] = self.resolution.y.value fits_header['crpix1'] = (bins.x.value + 1.0) / 2.0 fits_header['crpix2'] = (bins.y.value + 1.0) / 2.0 if 'instrument_label' in channel: fits_header['instrume'] = channel['instrument_label'] if 'wavelength' in channel: fits_header['wavelnth'] = channel['wavelength'].value # Anything that needs to be overridden in a subclass can be put in the fits template fits_header.update(self.fits_template) return fits_header def _get_fov(self, ar_map): """ Find the field of view, taking into consideration the corners of the original AR map and the loop coordinates in HPC. """ # Check magnetogram FOV left_corner = (ar_map.bottom_left_coord.transform_to( HeliographicStonyhurst).transform_to( Helioprojective(observer=self.observer_coordinate))) right_corner = (ar_map.top_right_coord.transform_to( HeliographicStonyhurst).transform_to( Helioprojective(observer=self.observer_coordinate))) # Set bounds to include all loops and original magnetogram FOV (with some padding) loop_coords = self.total_coordinates if 'gaussian_width' in self.channels[0]: width_max = u.Quantity( [c['gaussian_width']['x'] for c in self.channels]).max() pad_x = self.resolution.x * width_max width_max = u.Quantity( [c['gaussian_width']['y'] for c in self.channels]).max() pad_y = self.resolution.y * width_max else: pad_x = self.resolution.x * 1 * u.pixel pad_y = self.resolution.y * 1 * u.pixel min_x = min(loop_coords.Tx.min(), left_corner.Tx) - pad_x max_x = max(loop_coords.Tx.max(), right_corner.Tx) + pad_x min_y = min(loop_coords.Ty.min(), left_corner.Ty) - pad_y max_y = max(loop_coords.Ty.max(), right_corner.Ty) + pad_y return min_x, max_x, min_y, max_y def make_detector_array(self, field): """ Construct bins based on desired observing area. """ # Get field of view min_x, max_x, min_y, max_y = self._get_fov(field.magnetogram) min_z = self.total_coordinates.distance.min() max_z = self.total_coordinates.distance.max() delta_x = max_x - min_x delta_y = max_y - min_y bins_x = np.ceil(delta_x / self.resolution.x) bins_y = np.ceil(delta_y / self.resolution.y) bins_z = max(bins_x, bins_y) # NOTE: the z-quantities are used to determine the integration step along the LOS bins = SpatialPair(x=bins_x, y=bins_y, z=bins_z) bin_range = SpatialPair(x=u.Quantity([min_x, max_x]), y=u.Quantity([min_y, max_y]), z=u.Quantity([min_z, max_z])) return bins, bin_range
def _parse_file(cls, filepath): """Parses an NOAA indices csv file""" """ header = [] with open(filepath, 'r') as fp: line = fp.readline() # Read header at top of file while line.startswith((":", "#")): header += line line = fp.readline() fields = ('yyyy', 'mm', 'sunspot SWO', 'sunspot RI', 'sunspot ratio', 'sunspot SWO smooth', 'sunspot RI smooth', 'radio flux', 'radio flux smooth', 'geomagnetic ap', 'geomagnetic smooth') data = read_csv(fp, delim_whitespace=True, names = fields, comment='#', dtype={'yyyy':np.str, 'mm':np.str}) data = data.dropna(how='any') timeindex = [datetime.datetime.strptime(x + y, '%Y%m') for x,y in zip(data['yyyy'], data['mm'])] data['time']=timeindex data = data.set_index('time') data = data.drop('mm',1) data = data.drop('yyyy',1) return data, {'comments': header} """ header = [] with open(filepath, 'r') as fp: line = fp.readline() # Read header at top of file while line.startswith((":", "#")): header += line line = fp.readline() fields = ('yyyy', 'mm', 'sunspot SWO', 'sunspot RI', 'sunspot ratio', 'sunspot SWO smooth', 'sunspot RI smooth', 'radio flux', 'radio flux smooth', 'geomagnetic ap', 'geomagnetic smooth') data = read_csv(fp, delim_whitespace=True, names=fields, comment='#', dtype={ 'yyyy': np.str, 'mm': np.str }) data = data.dropna(how='any') timeindex = [ datetime.datetime.strptime(x + y, '%Y%m') for x, y in zip(data['yyyy'], data['mm']) ] data['time'] = timeindex data = data.set_index('time') data = data.drop('mm', 1) data = data.drop('yyyy', 1) # Add the units data units = OrderedDict([ ('sunspot SWO', u.dimensionless_unscaled), ('sunspot RI', u.dimensionless_unscaled), ('sunspot ratio', u.dimensionless_unscaled), ('sunspot SWO smooth', u.dimensionless_unscaled), ('sunspot RI smooth', u.dimensionless_unscaled), ('radio flux', u.W / u.m**2), ('radio flux smooth', u.W / u.m**2), ('geomagnetic ap', u.dimensionless_unscaled), ('geomagnetic smooth', u.dimensionless_unscaled) ]) # Todo: check units # Todo: fix header/meta, it's returning rubbish. return data, MetaDict({'comments': header}), units
def test_init_with_invalid_keycomments_type(): """ Ensure attempt to initialise with an invalid keycomments type is rejected. """ with pytest.raises(TypeError): MetaDict({'a': 1, 'b': 2, 'keycomments': 3})
def get_header(fn, hdu=0): with fn as fi: return MetaDict(sunpy.io.fits.get_header(fi)[hdu])
def _parse_args(self, *args, **kwargs): """ Parses an args list for data-header pairs. args can contain any mixture of the following entries: * tuples of (data, header, unit) (1) * data, header not in a tuple (1) * filename, which will be read * directory, from which all files will be read * glob, from which all files will be read * url, which will be downloaded and read * lists containing any of the above. (1) Note that header/unit are optional and in either order, but data but be the first entry in each group. Example ------- self._parse_args(data, header, (data, header), ['file1', 'file2', 'file3'], 'file4', 'directory1', '*.fits') """ data_header_unit_tuples = list() data_header_pairs = list() already_timeseries = list() filepaths = list() # Take source kwarg if defined source = kwargs.get('source', None) # Account for nested lists of items. Simply outputs a single list of # items, nested lists are expanded to element level. args = expand_list(args) # For each of the arguments, handle each of the cases i = 0 while i < len(args): arg = args[i] # Data-header pair in a tuple if (isinstance(arg, (np.ndarray, Table, pd.DataFrame))):# and self._validate_meta(args[i+1])): # Assume a Pandas Dataframe is given data = arg units = OrderedDict() meta = MetaDict() # Convert the data argument into a Pandas DataFrame if needed. if isinstance(data, Table): # We have an AstroPy Table: data, meta, units = self._from_table(data) elif isinstance(data, np.ndarray): # We have a numpy ndarray. We assume the first column is a dt index data = pd.DataFrame(data=data[:,1:], index=Time(data[:,0])) # If there are 1 or 2 more arguments: for _ in range(2): if (len(args) > i+1): # If that next argument isn't data but is metaddata or units: if not isinstance(args[i+1], (np.ndarray, Table, pd.DataFrame)): if self._validate_units(args[i+1]): units.update(args[i+1]) i += 1 # an extra increment to account for the units elif self._validate_meta(args[i+1]): # if we have an astropy.io FITS header then convert # to preserve multi-line comments if isinstance(args[i+1], astropy.io.fits.header.Header): args[i+1] = MetaDict(sunpy.io.header.FileHeader(args[i+1])) meta.update(args[i+1]) i += 1 # an extra increment to account for the meta # Add a 3-tuple for this TimeSeries. data_header_unit_tuples.append((data, meta, units)) # Filepath elif (isinstance(arg, six.string_types) and os.path.isfile(os.path.expanduser(arg))): path = os.path.expanduser(arg) read, result = self._read_file(path, **kwargs) if read: data_header_pairs.append(result) else: filepaths.append(result) # Directory elif (isinstance(arg, six.string_types) and os.path.isdir(os.path.expanduser(arg))): path = os.path.expanduser(arg) files = [os.path.join(path, elem) for elem in os.listdir(path)] for afile in files: # returns a boolean telling us if it were read and either a # tuple or the original filepath for reading by a source read, result = self._read_file(afile, **kwargs) if read: data_header_pairs.append(result) else: filepaths.append(result) # Glob elif (isinstance(arg, six.string_types) and '*' in arg): files = glob.glob(os.path.expanduser(arg)) for afile in files: # data_header_unit_tuples += self._read_file(afile, **kwargs) # returns a boolean telling us if it were read and either a # tuple or the original filepath for reading by a source read, result = self._read_file(afile, **kwargs) if read: data_header_pairs.append(result) else: filepaths.append(result) # Already a TimeSeries elif isinstance(arg, GenericTimeSeries): already_timeseries.append(arg) # A URL elif (isinstance(arg,six.string_types) and _is_url(arg)): default_dir = sunpy.config.get("downloads", "download_dir") url = arg path = download_file(url, default_dir) pairs = self._read_file(path, **kwargs) #data_header_pairs += pairs filepaths.append(pairs[1]) else: #raise ValueError("File not found or invalid input") raise NoMatchError("File not found or invalid input") i += 1 # TODO: # In the end, if there are already TimeSeries it should be put in the # same order as the input, currently they are not. return data_header_unit_tuples, data_header_pairs, already_timeseries, filepaths
ts_goes.to_array() # Note: the array doesn't include the datetime index column. ############################################################################## # Creating a TimeSeries from scratch can be done in a lot of ways, much like a # Map. # Input data can be in the form of a Pandas DataFrame (preferred), an astropy # Table or a Numpy Array. # To generate some data and the corresponding dates base = datetime.datetime.today() dates = Time(base) - TimeDelta(np.arange(24 * 60) * u.minute) intensity = np.sin(np.arange(0, 12 * np.pi, ((12 * np.pi) / (24 * 60)))) # Create the data DataFrame, header MetaDict and units OrderedDict data = DataFrame(intensity, index=dates, columns=['intensity']) units = OrderedDict([('intensity', u.W / u.m**2)]) meta = MetaDict({'key': 'value'}) # Create the time series ts_custom = sunpy.timeseries.TimeSeries(data, meta, units) # A more manual dataset would be a numpy array, which we can creat using: tm = Time(['2000:002', '2001:345', '2002:345']) a = [1, 4, 5] b = [2.0, 5.0, 8.2] c = ['x', 'y', 'z'] arr = np.stack([tm, a, b, c], axis=1) # Note: this array needs to have the times in the first column, this can be in # any form that can be converted using astropy.time.Time(). # We can use the array directly: ts_from_arr = sunpy.timeseries.TimeSeries(arr, {})
def __call__(self, *args, **kwargs): """ Method for running the factory. Takes arbitrary arguments and keyword arguments and passes them to a sequence of pre-registered types to determine which is the correct TimeSeries source type to build. Arguments args and kwargs are passed through to the validation function and to the constructor for the final type. For TimeSeries types, validation function must take a data-header pair as an argument. Parameters ---------- silence_errors : `bool`, optional If set, ignore data-header pairs which cause an exception. Notes ----- Extra keyword arguments are passed through to `sunpy.io.read_file` such as `memmap` for FITS files. """ # Hack to get around Python 2.x not backporting PEP 3102. silence_errors = kwargs.pop('silence_errors', False) (data_header_unit_tuples, data_header_pairs, already_timeseries, filepaths) = self._parse_args(*args, **kwargs) new_timeseries = list() # The filepaths for unreadable files for filepath in filepaths: try: new_ts = self._check_registered_widgets(filepath=filepath, **kwargs) except (NoMatchError, MultipleMatchError, ValidationFunctionError): if not silence_errors: raise except: raise new_timeseries.append(new_ts) # data_header_pairs is a list of HDUs as read by sunpy.io # For each set of HDus find the matching class and read the # data_header_unit_tuples by calling the _parse_hdus method # of the class. for pairs in data_header_pairs: # Pairs may be x long where x is the number of HDUs in the file. headers = [pair.header for pair in pairs] types = [] for header in headers: try: match = self._get_matching_widget(meta=header, **kwargs) if not match == GenericTimeSeries: types.append(match) except (MultipleMatchError, NoMatchError): continue if not types: # If no specific classes have been found we can read the data # if we only have one data header pair: if len(pairs) == 1: already_timeseries.append(GenericTimeSeries(pairs[0].data, pairs[0].header)) else: raise NoMatchError("Input read by sunpy.io can not find a " "matching class for reading multiple HDUs") if len(set(types)) > 1: raise MultipleMatchError("Multiple HDUs return multiple matching classes.") cls = types[0] data_header_unit_tuples.append(cls._parse_hdus(pairs)) # Loop over each registered type and check to see if WidgetType # matches the arguments. If it does, use that type for triple in data_header_unit_tuples: data, header, units = triple # Make a MetaDict from various input types meta = header if isinstance(meta, astropy.io.fits.header.Header): meta = sunpy.io.header.FileHeader(meta) meta = MetaDict(meta) try: new_ts = self._check_registered_widgets(data=data, meta=meta, units=units, **kwargs) except (NoMatchError, MultipleMatchError, ValidationFunctionError): if not silence_errors: raise except: raise new_timeseries.append(new_ts) new_timeseries += already_timeseries # Concatenate the timeseries into one if specified. concatenate = kwargs.get('concatenate', False) if concatenate: # Merge all these timeseries into one. full_timeseries = new_timeseries.pop(0) for timeseries in new_timeseries: full_timeseries = full_timeseries.concatenate(timeseries) new_timeseries = [full_timeseries] # Sanitize any units OrderedDict details for timeseries in new_timeseries: timeseries._sanitize_units() # Only return single time series, not in a list if we only have one. if len(new_timeseries) == 1: return new_timeseries[0] return new_timeseries
def test_init_with_illegal_arg(): """ Ensure attempt to initialise with a nonsensical data structure is rejected. """ with pytest.raises(TypeError): MetaDict({'a', 'b', 'c', 'd'})
def __call__(self, *args, composite=False, sequence=False, silence_errors=False, **kwargs): """ Method for running the factory. Takes arbitrary arguments and keyword arguments and passes them to a sequence of pre-registered types to determine which is the correct Map-type to build. Arguments args and kwargs are passed through to the validation function and to the constructor for the final type. For Map types, validation function must take a data-header pair as an argument. Parameters ---------- composite : `bool`, optional Indicates if collection of maps should be returned as a `~sunpy.map.CompositeMap`. Default is `False`. sequence : `bool`, optional Indicates if collection of maps should be returned as a `sunpy.map.MapSequence`. Default is `False`. silence_errors : `bool`, optional If set, ignore data-header pairs which cause an exception. Default is ``False``. Notes ----- Extra keyword arguments are passed through to `sunpy.io.read_file` such as `memmap` for FITS files. """ data_header_pairs = self._parse_args(*args, silence_errors=silence_errors, **kwargs) new_maps = list() # Loop over each registered type and check to see if WidgetType # matches the arguments. If it does, use that type. for pair in data_header_pairs: if isinstance(pair, GenericMap): new_maps.append(pair) continue data, header = pair meta = MetaDict(header) try: new_map = self._check_registered_widgets(data, meta, **kwargs) new_maps.append(new_map) except (NoMatchError, MultipleMatchError, ValidationFunctionError, MapMetaValidationError) as e: if not silence_errors: raise warnings.warn( f"One of the data, header pairs failed to validate with: {e}", SunpyUserWarning) if not len(new_maps): raise RuntimeError('No maps loaded') # If the list is meant to be a sequence, instantiate a map sequence if sequence: return MapSequence(new_maps, **kwargs) # If the list is meant to be a composite map, instantiate one if composite: return CompositeMap(new_maps, **kwargs) if len(new_maps) == 1: return new_maps[0] return new_maps
def _parse_netcdf(filepath): """ Parses the netCDF GOES files to return the data, header and associated units. Parameters ---------- filepath : `str` The path of the file to parse """ with h5netcdf.File(filepath, mode="r", **XRSTimeSeries._netcdf_read_kw) as h5nc: header = MetaDict(OrderedDict(h5nc.attrs)) flux_name = h5nc.variables.get("a_flux") or h5nc.variables.get( "xrsa_flux") if flux_name is None: raise ValueError( f"No flux data (either a_flux or xrsa_flux) found in file: {filepath}" ) flux_name_a = flux_name.name flux_name_b = flux_name_a.replace("a", "b") xrsa = np.array(h5nc[flux_name_a]) xrsb = np.array(h5nc[flux_name_b]) xrsa_quality = np.array(h5nc[flux_name_a.replace("flux", "flags")]) xrsb_quality = np.array(h5nc[flux_name_b.replace("flux", "flags")]) start_time_str = h5nc["time"].attrs["units"] # h5netcdf < 0.14 return bytes instead of a str if isinstance(start_time_str, bytes): start_time_str = start_time_str.decode("utf-8") start_time_str = start_time_str.lstrip("seconds since").rstrip( "UTC").strip() times = Time(parse_time(start_time_str).unix + h5nc["time"], format="unix") try: times = times.datetime except ValueError: # We do not make the assumption that the leap second occurs at the end of the file. # Therefore, we need to find it: # To do so, we convert the times to isot strings, use numpy to find the the leap second string, # then use that to workout the index of the leap timestamp. idx = np.argwhere((np.char.find(times.isot, ":60.") != -1) == True) # We only handle the case there is only 1 leap second in the file. # I don't think there every would be a case where it would be more than 1. if len(idx) != 1: raise ValueError( f"More than one leap second was found in: {Path(filepath).name}" ) warn_user( f"There is one leap second timestamp present in: {Path(filepath).name}, " "This timestamp has been rounded to `:59.999` to allow its conversion into a Python datetime. " f"The leap second timestamp was: {times.isot[idx]}") times[idx] = Time(times[idx].isot.tolist()[0][0][:17] + "59.999").unix times = times.datetime data = DataFrame( { "xrsa": xrsa, "xrsb": xrsb, "xrsa_quality": xrsa_quality, "xrsb_quality": xrsb_quality }, index=times) data = data.replace(-9999, np.nan) units = OrderedDict([ ("xrsa", u.W / u.m**2), ("xrsb", u.W / u.m**2), ("xrsa_quality", int), ("xrsb_quality", int), ]) return data, header, units