def test_upcasing(): meta = MetaDict({'wibble': 1, 'WOBBLE': 2}) #__getitem__ assert meta['wibble'] == meta['WIBBLE'] #get assert meta.get('wibble') == meta.get('WIBBLE') #has_key assert ('wibble' in meta) == ('WIBBLE' in meta) #Copy meta2 = meta.copy() assert meta2 == meta #pop assert meta.pop('wibble') == meta2.pop('WIBBLE') #update meta.update({'spam': 'eggs'}) meta2.update({'SPAM': 'eggs'}) assert meta == meta2 #setdefault meta.setdefault('dave', 3) meta2.setdefault('DAVE', 3) assert meta.get('DAVE') == meta2.get('dave') #__setitem__ meta['wibble'] = 10 assert meta['wibble'] == 10 meta['WIBBLE'] = 20 assert meta['wibble'] == 20 #__contains__ assert 'wibble' in meta assert 'WIBBLE' in meta
def test_upcasing(): meta = MetaDict({'wibble':1, 'WOBBLE':2}) #__getitem__ assert meta['wibble'] == meta['WIBBLE'] #get assert meta.get('wibble') == meta.get('WIBBLE') #has_key assert ('wibble' in meta) == ('WIBBLE' in meta) #Copy meta2 = meta.copy() assert meta2 == meta #pop assert meta.pop('wibble') == meta2.pop('WIBBLE') #update meta.update({'spam':'eggs'}) meta2.update({'SPAM':'eggs'}) assert meta == meta2 #setdefault meta.setdefault('dave',3) meta2.setdefault('DAVE',3) assert meta.get('DAVE') == meta2.get('dave') #__setitem__ meta['wibble'] = 10 assert meta['wibble'] == 10 meta['WIBBLE'] = 20 assert meta['wibble'] == 20 #__contains__ assert 'wibble' in meta assert 'WIBBLE' in meta
def _sanitise_args(self, args): """ Sanitise a list of args so that a single argument corresponds to either: - (data, header, units) tuple. - path-like `pathlib.Path` (e.g. a filename, directory, glob etc.). - `urllib.request.Request`. - `GenericTimeSeries`. """ # Account for nested lists of items. Simply outputs a single list of # items, nested lists are expanded to element level. args = expand_list(args) # Sanitise the input so that each 'type' of input corresponds to a different # class, so single dispatch can be used later i = 0 while i < len(args): arg = args[i] if isinstance(arg, (np.ndarray, Table, pd.DataFrame)): # Extract data and metadata # The next item is data data = args[i] meta = MetaDict() units = OrderedDict() if isinstance(data, Table): # We have an Astropy Table: data, new_meta, new_units = self._from_table(data) units.update(new_units) meta.update(new_meta) elif isinstance(data, np.ndarray): # We have a numpy ndarray. We assume the first column is a dt index data = pd.DataFrame(data=data[:, 1:], index=Time(data[:, 0])) # The next two could be metadata or units for _ in range(2): j = i + 1 if j < len(args): arg = args[j] if self._is_units(arg): units.update(arg) args.pop(j) elif self._is_metadata(arg): meta.update(self._parse_meta(arg)) args.pop(j) args[i] = (data, meta, units) elif isinstance(arg, str) and is_url(arg): args[i] = Request(arg) elif possibly_a_path(arg): args[i] = pathlib.Path(arg) i += 1 return args
def _parse_meta(self, meta): """ Parse different metadata objects into a MetaDict. """ if isinstance(meta, astropy.io.fits.header.Header): meta = MetaDict(sunpy.io.header.FileHeader(meta)) if isinstance(meta, sunpy.timeseries.TimeSeriesMetaData): new_meta = MetaDict() for m in meta.metas: new_meta.update(m) meta = new_meta return meta
def get(self, keys, time=None, colname=None, **kwargs): """ Return a TimeSeriesMetaData object of all entries matching the time and colname filters with the dictionaries containing only the key value pairs with the key matching the given input key. Parameters ---------- keys : `str` The Key/s to be searched in the dictionary. time : `str` or `~datetime.datetime` optional The string (parsed using the `~sunpy.time.parse_time`) or datetime that you need metadata for. colname : `str` optional A string that can be used to narrow results to specific columns. itemised : `bool` optional Option to allow the return of the time ranges and column names (as list) that match each given value. Returns ------- metadata : `~sunpy.timeseries.metadata.TimeSeriesMetaData` A TimeSeriesMetaData that contain all matching metadata entries but with only the requested key/value pairs in the MetaDict objects. """ # Make a list of keys if only one is given if isinstance(keys, str): keys = [ keys ] # Find all metadata entries for the given time/colname filters full_metadata = self.find(time=time, colname=colname) metadata = [] # Append to metadata only key:value pairs with requested keys for i, entry in enumerate(full_metadata.metadata): metadict = MetaDict() for curkey, value in entry[2].items(): for key in keys: if curkey.lower() == key.lower(): metadict.update({key:value}) metadata.append((entry[0], entry[1], metadict)) # Return a TimeSeriesMetaData object return TimeSeriesMetaData(meta=metadata)
def get(self, keys, time=None, colname=None, **kwargs): """ Return a TimeSeriesMetaData object of all entries matching the time and colname filters with the dictionaries containing only the key value pairs with the key matching the given input key. Parameters ---------- keys : `str` The Key/s to be searched in the dictionary. time : `str` or `astropy.time.Time` optional The string (parsed using the `~sunpy.time.parse_time`) or `~astropy.time.Time` that you need metadata for. colname : `str` optional A string that can be used to narrow results to specific columns. itemised : `bool` optional Option to allow the return of the time ranges and column names (as list) that match each given value. Returns ------- metadata : `~sunpy.timeseries.metadata.TimeSeriesMetaData` A TimeSeriesMetaData that contain all matching metadata entries but with only the requested key/value pairs in the MetaDict objects. """ # Make a list of keys if only one is given if isinstance(keys, str): keys = [keys] # Find all metadata entries for the given time/colname filters full_metadata = self.find(time=time, colname=colname) metadata = [] # Append to metadata only key:value pairs with requested keys for i, entry in enumerate(full_metadata.metadata): metadict = MetaDict() for curkey, value in entry[2].items(): for key in keys: if curkey.lower() == key.lower(): metadict.update({key: value}) metadata.append((entry[0], entry[1], metadict)) # Return a TimeSeriesMetaData object return TimeSeriesMetaData(meta=metadata)
def _parse_hdus(cls, hdulist): header = MetaDict(OrderedDict(hdulist[0].header)) # Adding telescope to MetaData header.update({'TELESCOP': hdulist[1].header['TELESCOP'].split()[0]}) start_time = parse_time(hdulist[1].header['T_OBS']) times = start_time + TimeDelta(hdulist[1].data['SOD'] * u.second) colnames = ['QD', 'CH_18', 'CH_26', 'CH_30', 'CH_36'] all_data = [hdulist[1].data[x] for x in colnames] data = DataFrame(np.array(all_data).T, index=times.isot.astype('datetime64'), columns=colnames) data.sort_index(inplace=True) units = OrderedDict([('QD', u.W / u.m**2), ('CH_18', u.W / u.m**2), ('CH_26', u.W / u.m**2), ('CH_30', u.W / u.m**2), ('CH_36', u.W / u.m**2)]) return data, header, units
def _parse_hdus(cls, hdulist): header = MetaDict(OrderedDict(hdulist[0].header)) # Adding telescope to MetaData header.update({'TELESCOP': hdulist[1].header['TELESCOP'].split()[0]}) start_time = parse_time(hdulist[1].header['T_OBS']) times = start_time + TimeDelta(hdulist[1].data['SOD']*u.second) colnames = ['QD', 'CH_18', 'CH_26', 'CH_30', 'CH_36'] all_data = [hdulist[1].data[x] for x in colnames] data = DataFrame(np.array(all_data).T, index=times.isot.astype('datetime64'), columns=colnames) data.sort_index(inplace=True) units = OrderedDict([('QD', u.W/u.m**2), ('CH_18', u.W/u.m**2), ('CH_26', u.W/u.m**2), ('CH_30', u.W/u.m**2), ('CH_36', u.W/u.m**2)]) return data, header, units
def get(self, keys, time=None, colname=None): """ Return a `~sunpy.timeseries.metadata.TimeSeriesMetaData` with all entries matching the filters which also contain the given input key. Parameters ---------- keys : `str` The Key/s to be searched in the dictionary. time : {parse_time_types}, optional A `~sunpy.time.parse_time` parsable string that you need metadata for. Defaults to `None`. colname : `str`, optional A string that can be used to narrow results to specific columns. Returns ------- metadata : `~sunpy.timeseries.metadata.TimeSeriesMetaData` A TimeSeriesMetaData that contain all matching metadata entries but with only the requested key/value pairs in the MetaDict objects. """ # Make a list of keys if only one is given if isinstance(keys, str): keys = [keys] # Find all metadata entries for the given time/colname filters full_metadata = self.find(time=time, colname=colname) metadata = [] # Append to metadata only key:value pairs with requested keys for i, entry in enumerate(full_metadata.metadata): metadict = MetaDict() for curkey, value in entry[2].items(): for key in keys: if curkey.lower() == key.lower(): metadict.update({key: value}) metadata.append((entry[0], entry[1], metadict)) # Return a TimeSeriesMetaData object return TimeSeriesMetaData(meta=metadata)
def _parse_args(self, *args, **kwargs): """ Parses an `args` list for data-header pairs. `args` can contain any mixture of the following entries: * tuples of (data, header, unit) (1) * data, header not in a tuple (1) * filename, which will be read * directory, from which all files will be read * glob, from which all files will be read * url, which will be downloaded and read * lists containing any of the above. (1) header/unit are optional and in either order, but data should be the first entry in each group. Examples -------- self._parse_args(data, header, (data, header), ['file1', 'file2', 'file3'], 'file4', 'directory1', '*.fits') """ data_header_unit_tuples = list() data_header_pairs = list() already_timeseries = list() filepaths = list() # Account for nested lists of items. Simply outputs a single list of # items, nested lists are expanded to element level. args = expand_list(args) # For each of the arguments, handle each of the cases i = 0 while i < len(args): arg = args[i] # Data-header pair in a tuple if (isinstance(arg, (np.ndarray, Table, pd.DataFrame))): # and self._validate_meta(args[i+1])): # Assume a Pandas Dataframe is given data = arg units = OrderedDict() meta = MetaDict() # Convert the data argument into a Pandas DataFrame if needed. if isinstance(data, Table): # We have an Astropy Table: data, meta, units = self._from_table(data) elif isinstance(data, np.ndarray): # We have a numpy ndarray. We assume the first column is a dt index data = pd.DataFrame(data=data[:, 1:], index=Time(data[:, 0])) # If there are 1 or 2 more arguments: for _ in range(2): if (len(args) > i+1): # If that next argument isn't data but is metaddata or units: if not isinstance(args[i+1], (np.ndarray, Table, pd.DataFrame)): if self._validate_units(args[i+1]): units.update(args[i+1]) i += 1 # an extra increment to account for the units elif self._validate_meta(args[i+1]): # if we have an astropy.io FITS header then convert # to preserve multi-line comments if isinstance(args[i+1], astropy.io.fits.header.Header): args[i+1] = MetaDict(sunpy.io.header.FileHeader(args[i+1])) meta.update(args[i+1]) i += 1 # an extra increment to account for the meta # Add a 3-tuple for this TimeSeries. data_header_unit_tuples.append((data, meta, units)) # Filepath elif (isinstance(arg, str) and os.path.isfile(os.path.expanduser(arg))): path = os.path.expanduser(arg) result = self._read_file(path, **kwargs) data_header_pairs, filepaths = _apply_result(data_header_pairs, filepaths, result) # Directory elif (isinstance(arg, str) and os.path.isdir(os.path.expanduser(arg))): path = os.path.expanduser(arg) files = [os.path.join(path, elem) for elem in os.listdir(path)] for afile in files: # returns a boolean telling us if it were read and either a # tuple or the original filepath for reading by a source result = self._read_file(afile, **kwargs) data_header_pairs, filepaths = _apply_result(data_header_pairs, filepaths, result) # Glob elif isinstance(arg, str) and '*' in arg: files = glob.glob(os.path.expanduser(arg)) for afile in files: # returns a boolean telling us if it were read and either a # tuple or the original filepath for reading by a source result = self._read_file(afile, **kwargs) data_header_pairs, filepaths = _apply_result(data_header_pairs, filepaths, result) # Already a TimeSeries elif isinstance(arg, GenericTimeSeries): already_timeseries.append(arg) # A URL elif (isinstance(arg, str) and _is_url(arg)): url = arg path = download_file(url, get_and_create_download_dir()) result = self._read_file(path, **kwargs) data_header_pairs, filepaths = _apply_result(data_header_pairs, filepaths, result) else: raise NoMatchError("File not found or invalid input") i += 1 # TODO: # In the end, if there are already TimeSeries it should be put in the # same order as the input, currently they are not. return data_header_unit_tuples, data_header_pairs, already_timeseries, filepaths
def _parse_args(self, *args, **kwargs): """ Parses an args list for data-header pairs. args can contain any mixture of the following entries: * tuples of (data, header, unit) (1) * data, header not in a tuple (1) * filename, which will be read * directory, from which all files will be read * glob, from which all files will be read * url, which will be downloaded and read * lists containing any of the above. (1) Note that header/unit are optional and in either order, but data but be the first entry in each group. Example ------- self._parse_args(data, header, (data, header), ['file1', 'file2', 'file3'], 'file4', 'directory1', '*.fits') """ data_header_unit_tuples = list() data_header_pairs = list() already_timeseries = list() filepaths = list() # Take source kwarg if defined source = kwargs.get('source', None) # Account for nested lists of items. Simply outputs a single list of # items, nested lists are expanded to element level. args = expand_list(args) # For each of the arguments, handle each of the cases i = 0 while i < len(args): arg = args[i] # Data-header pair in a tuple if (isinstance(arg, (np.ndarray, Table, pd.DataFrame))):# and self._validate_meta(args[i+1])): # Assume a Pandas Dataframe is given data = arg units = OrderedDict() meta = MetaDict() # Convert the data argument into a Pandas DataFrame if needed. if isinstance(data, Table): # We have an AstroPy Table: data, meta, units = self._from_table(data) elif isinstance(data, np.ndarray): # We have a numpy ndarray. We assume the first column is a dt index data = pd.DataFrame(data=data[:,1:], index=Time(data[:,0])) # If there are 1 or 2 more arguments: for _ in range(2): if (len(args) > i+1): # If that next argument isn't data but is metaddata or units: if not isinstance(args[i+1], (np.ndarray, Table, pd.DataFrame)): if self._validate_units(args[i+1]): units.update(args[i+1]) i += 1 # an extra increment to account for the units elif self._validate_meta(args[i+1]): # if we have an astropy.io FITS header then convert # to preserve multi-line comments if isinstance(args[i+1], astropy.io.fits.header.Header): args[i+1] = MetaDict(sunpy.io.header.FileHeader(args[i+1])) meta.update(args[i+1]) i += 1 # an extra increment to account for the meta # Add a 3-tuple for this TimeSeries. data_header_unit_tuples.append((data, meta, units)) # Filepath elif (isinstance(arg, six.string_types) and os.path.isfile(os.path.expanduser(arg))): path = os.path.expanduser(arg) read, result = self._read_file(path, **kwargs) if read: data_header_pairs.append(result) else: filepaths.append(result) # Directory elif (isinstance(arg, six.string_types) and os.path.isdir(os.path.expanduser(arg))): path = os.path.expanduser(arg) files = [os.path.join(path, elem) for elem in os.listdir(path)] for afile in files: # returns a boolean telling us if it were read and either a # tuple or the original filepath for reading by a source read, result = self._read_file(afile, **kwargs) if read: data_header_pairs.append(result) else: filepaths.append(result) # Glob elif (isinstance(arg, six.string_types) and '*' in arg): files = glob.glob(os.path.expanduser(arg)) for afile in files: # data_header_unit_tuples += self._read_file(afile, **kwargs) # returns a boolean telling us if it were read and either a # tuple or the original filepath for reading by a source read, result = self._read_file(afile, **kwargs) if read: data_header_pairs.append(result) else: filepaths.append(result) # Already a TimeSeries elif isinstance(arg, GenericTimeSeries): already_timeseries.append(arg) # A URL elif (isinstance(arg,six.string_types) and _is_url(arg)): default_dir = sunpy.config.get("downloads", "download_dir") url = arg path = download_file(url, default_dir) pairs = self._read_file(path, **kwargs) #data_header_pairs += pairs filepaths.append(pairs[1]) else: #raise ValueError("File not found or invalid input") raise NoMatchError("File not found or invalid input") i += 1 # TODO: # In the end, if there are already TimeSeries it should be put in the # same order as the input, currently they are not. return data_header_unit_tuples, data_header_pairs, already_timeseries, filepaths