def test_variable_merge_interleaved(self): index1 = pd.date_range(start=datetime.datetime(2000, 1, 1), periods=10, freq='2S') index2 = pd.date_range(start=datetime.datetime(2000, 1, 1, 0, 0, 1), periods=10, freq='2S') v1 = DecadesVariable({TEST_VAR_NAME: TEST_VAR_VALUES}, index=index1, frequency=1) v2 = DecadesVariable({TEST_VAR_NAME: TEST_VAR_VALUES}, index=index2, frequency=1) v1.merge(v2) self.assertEqual(v1.t0, index1[0]) self.assertEqual(v1.t1, index2[-1]) self.assertEqual(len(v1), 2 * (len(TEST_VAR_VALUES))) expected_array = [] for i in TEST_VAR_VALUES: expected_array.append(i) expected_array.append(i) for a, b in zip(v1.array, expected_array): self.assertEqual(a, b)
def process(self): """ Module entry hook. """ # Get all of the required inputs. self.get_dataframe() # Run required calculations in turn. These are stored in instance # state. self.calc_mach() self.calc_ias() self.calc_tas() # Create output variables for the indicated and true airspeeds. ias = DecadesVariable(self.d['IAS_RVSM'], flag=DecadesBitmaskFlag) tas = DecadesVariable(self.d['TAS_RVSM'], flag=DecadesBitmaskFlag) # Flag the data wherever the mach number is out of range. for _var in (ias, tas): _var.flag.add_mask( self.d['MACHNO_FLAG'], 'mach out of range', ('Either static or dynamic pressure out of acceptable limits ' 'during calculation of mach number.')) self.add_output(_var)
def read(self): if len(self.files) > 1: raise ValueError('Only 1 FGGA file currently accepted') reader = NasaAmes1001Reader(self.files[0].filepath) reader.read() self.metadata = reader.meta self.data = reader.data self._extract_units() short_names = [ 'FGGA_CO2', 'FGGA_CO2_FLAG', 'FGGA_CH4', 'FGGA_CH4_FLAG' ] dataset = self.files[0].dataset for i in (0, 2): var = DecadesVariable( {short_names[i]: self.data[self.metadata['vname'][i]]}, long_name=self.metadata['long_names'][i], units=self.metadata['units'][i], frequency=1, flag=None, write=False) flag = DecadesVariable( {short_names[i + 1]: self.data[self.metadata['vname'][i + 1]]}, units=None, frequency=1, flag=None, write=False) dataset.add_input(var) dataset.add_input(flag) dataset.add_global('comment', '\n'.join(self.metadata['ncom']))
def test_dataset_time_bounds_inputs(self): v1 = DecadesVariable({'a': TEST_VAR_VALUES}, index=self.test_index_1) index2 = self.test_index_1 + datetime.timedelta(minutes=1) v2 = DecadesVariable({'b': TEST_VAR_VALUES}, index=index2) self.d.add_input(v1) self.d.add_input(v2) bnds = self.d.time_bounds() self.assertEqual(bnds[0], self.test_index_1[0]) self.assertEqual(bnds[1], index2[-1])
def test_list_variables(self): v1 = DecadesVariable({'a': TEST_VAR_VALUES}, index=self.test_index_1, frequency=1) v2 = DecadesVariable({'b': TEST_VAR_VALUES}, index=self.test_index_1, frequency=1) self.d.add_input(v1) self.d.add_output(v2) self.assertIn(v1.name, self.d.variables) self.assertIn(v2.name, self.d.variables)
def test_variable_isnumeric(self): self.assertTrue(self.v.isnumeric) v2 = DecadesVariable({'stringvar': ['a'] * len(TEST_VAR_VALUES)}, index=self.test_index_1, frequency=1) self.assertFalse(v2.isnumeric)
def read(self): for _file in self.files: logger.info(f'Reading {_file}') dfs, metadata = to_dataframe(_file.filepath) for k in dfs.keys(): df = dfs[k] for i, name in enumerate(parser_f[k]['names']): try: _freq = int( np.timedelta64(1, 's') / dfs[k].index.freq.delta) except Exception: break _data = df[name].values _varname = 'SEAPROBE_{}_{}'.format(k, name.replace('el', '')) _var = DecadesVariable( {_varname: _data}, index=df.index, long_name=parser_f[k]['long_names'][i], frequency=_freq, units=parser_f[k]['units'][i], write=False, flag=None) _file.dataset.add_input(_var) for key, value in metadata.items(): _file.dataset.constants['SEA_{}'.format(key)] = value
def test_takeoff_land_time(self): vals = [1, 1, 1, 1, 0, 0, 0, 1, 1, 1] prtaft = DecadesVariable({'PRTAFT_wow_flag': vals}, index=self.test_index_1) self.d.add_input(prtaft) self.assertEqual(self.d.takeoff_time, self.test_index_1[4]) self.assertEqual(self.d.landing_time, self.test_index_1[7])
def test_clear_outputs(self): v1 = self.v v2 = DecadesVariable({'v2': TEST_VAR_VALUES}, index=self.test_index_1, frequency=1) self.d.add_output(v1) self.d.add_output(v2) self.d.clear_outputs() self.assertEqual(self.d.outputs, [])
def test_outputs_list(self): v1 = self.v v2 = DecadesVariable({'v2': TEST_VAR_VALUES}, index=self.test_index_1, frequency=1) self.d.add_output(v1) self.d.add_output(v2) self.assertIn(v1, self.d.outputs) self.assertIn(v2, self.d.outputs)
def read(self): for _file in self.files: date = None rex = re.compile('Date.*(\d{2}/\d{2}/\d{2}).*') cnt = 0 with open(_file.filepath, 'r') as f: while True: # Scan through the file until we have the date and the # header line cnt += 1 line = f.readline() header = [i.strip() for i in line.split(',')] if date is None: _date = rex.findall(line) if _date: date = datetime.datetime.strptime( _date[0], '%m/%d/%y').date() if len(header) > 50: break # The header isn't quite right - We first want to remove the first # element (Time, as this will be the index) and add 'A' and 'B' # versions of the final element header = header[1:] header.append(header[-1]) header[-2] += ' A' header[-1] += ' B' # Read the data using pandas, ensuring uniqueness of the index data = pd.read_csv(_file.filepath, header=0, names=header, skiprows=cnt - 1, index_col=0, parse_dates=True) data = data.groupby(data.index).last() # It's only the time that's reported. Using parse_dates will use # today as the date, so we need to correct for this using the date # extracted from the header of the data file delta = datetime.date.today() - date data.index -= delta for col in data.columns: name = col.replace(' ', '_') if not name.startswith('CCN_'): name = f'CCN_{name}' _file.dataset.add_input( DecadesVariable({name: data[col]}, units=None, frequency=1, flag=None, write=False))
def setUp(self): self.test_index_1 = pd.date_range(start=datetime.datetime(2000, 1, 1), periods=10, freq='S') self.test_index_32 = pd.date_range(start=datetime.datetime(2000, 1, 1), periods=320, freq='31250000N') self.d = DecadesDataset(datetime.datetime(2000, 1, 1)) self.v = DecadesVariable({TEST_VAR_NAME: TEST_VAR_VALUES}, index=self.test_index_1, frequency=1)
def test_variable_merge_noncontiguous(self): index1 = pd.date_range(start=datetime.datetime(2000, 1, 1), periods=10, freq='S') index2 = pd.date_range(start=datetime.datetime(2000, 1, 1, 0, 0, 15), periods=10, freq='S') v1 = DecadesVariable({TEST_VAR_NAME: TEST_VAR_VALUES}, index=index1, frequency=1) v2 = DecadesVariable({TEST_VAR_NAME: TEST_VAR_VALUES}, index=index2, frequency=1) v1.merge(v2) self.assertEqual(v1.t0, index1[0]) self.assertEqual(v1.t1, index2[-1]) self.assertEqual(len(v1), 2 * (len(TEST_VAR_VALUES)) + 5)
def test_instance(cls, dataset=None): """ Return a test instance of a postprocessing module, initialized with a DecadesDataset containing the modules test data. """ now = datetime.datetime.now().replace(microsecond=0) if dataset is None: d = DecadesDataset(now.date()) else: d = dataset if callable(cls.test): # pylint: disable=not-callable _test = cls.test() else: _test = cls.test for key, val in _test.items(): _type, *_values = val if val[0] == 'const': d.constants[key] = val[1] elif val[0] == 'data': _values, _freq = val[1:] _dt = datetime.timedelta(seconds=1 / _freq) freq = '{}N'.format((1 / _freq) * 1e9) start_time = datetime.datetime(*d.date.timetuple()[:3]) end_time = (start_time + datetime.timedelta(seconds=len(_values)) - _dt) hz1_index = pd.date_range(start=start_time, periods=len(_values), freq='S') full_index = pd.date_range(start=start_time, end=end_time, freq=freq) data = pd.Series(_values, hz1_index).reindex(full_index).interpolate() var = DecadesVariable(data, name=key, frequency=_freq) d.add_input(var) return cls(d, test_mode=True)
def read(self): for _file in self.files: df = pd.read_csv(_file.filepath, index_col=[0], parse_dates=[0]) _freq = int(1 / (df.index[1] - df.index[0]).total_seconds()) print(df) print(type(df.index[0])) for variable_name in df.columns: variable = DecadesVariable(df[variable_name], index=df.index, name=variable_name, long_name=variable_name, units='RAW', frequency=_freq, write=False, flag=None) _file.dataset.add_input(variable)
def read(self): for _file in self.files: logger.info(f'Reading {_file}...') with netCDF4.Dataset(_file.filepath) as nc: time = pd.DatetimeIndex( netCDF4.num2date(nc['Time'][:], units=nc['Time'].units, only_use_cftime_datetimes=False, only_use_python_datetimes=True)) for var in nc.variables: if var.endswith('FLAG') or var == 'Time': continue # Get the frequency of the variable from the name of the # second dimension. Ugly as sin! try: _freq = int(nc[var].dimensions[1].replace('sps', '')) except IndexError: _freq = 1 variable = DecadesVariable( {var: nc[var][:].ravel()}, index=self._time_at(time, self._var_freq(nc[var])), name=var, write=False, flag=self._flag_class(var, nc), frequency=_freq, ) self.flag(variable, nc) for attr in nc[var].ncattrs(): # Horrible frequency special case, to cope with v004 # core data files, where frequency corresponds to some # nominal measurement frequency, rather than sample # freq in the file if attr == 'frequency': setattr(variable, attr, _freq) continue setattr(variable, attr, getattr(nc[var], attr)) _file.dataset.add_input(variable)
def get_flagmod_doc(module): """ Returns documentation for a given flagging module, as restructured text. Args: module: the flagging module to document, expected to be a subclass of ppodd.flags.FlaggingBase. Returns: A restructured text string containing the module documentation. """ index = module.test_index flag = module.test_flag d = DecadesDataset() for var in module.flagged: v = DecadesVariable(pd.Series(flag, index=index, name=var), flag=DecadesBitmaskFlag) d.add_output(v) mod = module(d) mod._flag(test=True) output = "\n\n" output += '-' * len(module.__name__) + '\n' output += module.__name__ + '\n' output += '-' * len(module.__name__) + '\n\n' output += mod.__doc__ for var, flag_infos in mod.flags.items(): mod_var_txt = f'Flagged variable: `{var}`' output += '\n\n' + mod_var_txt + '\n' output += '-' * len(mod_var_txt) + '\n\n' for flag_info in flag_infos: output += f'* ``{flag_info[0]}`` - {flag_info[1]}\n' return output
def read(self): for _file in sorted(self.files, key=lambda x: os.path.basename(x.filepath)): self.dataset = _file.dataset self._index_dict = {} definition = self._get_definition(_file) if definition is None: warnings.warn( 'No CRIO definition found for {}'.format(_file.filepath), RuntimeWarning) continue dtypes = definition.dtypes logger.info('Reading {}...'.format(_file)) _data = np.fromfile(_file.filepath, dtype=dtypes) _read_fail = False for d in _data: try: data_id = d[0].decode('utf-8') except UnicodeDecodeError: _read_fail = True break data_id = data_id.replace('$', '') if data_id != definition.identifier: _read_fail = True break if _read_fail: del _data _data = self.scan(_file, definition) _time = _data[self.time_variable] # If there isn't any time info, then get out of here before we # raise an exception. if not len(_time): continue # Small amount of error tolerence. If there's a single dodgy # timestamp in between two otherwise OK timestamps, assume that # it's OK to interpolate across it _time = pd.Series(_time) _time.loc[(_time - _time.median()).abs() > C_BAD_TIME_DEV] = np.nan _time = _time.interpolate(limit=1).values _good_times = np.where(~np.isnan(_time)) _time = _time[_good_times] if len(_time) < C_MIN_ARRAY_LEN: logger.info('Array shorter than minimum valid length.') continue for _name, _dtype in _data.dtype.fields.items(): if _name[0] == '$': continue if _name == self.time_variable: continue # Pandas doesn't enjoy non-native endianess, so convert data # to system byteorder if required _var_dtype = _dtype[0].base if definition.get_field(_name).byte_order != sys.byteorder: _var = _data[_name].byteswap().newbyteorder() _var_dtype = _var_dtype.newbyteorder() else: _var = _data[_name] if len(_var.shape) == 1: _var = _var[_good_times] else: _var = _var[_good_times, :] frequency, index = self._get_index(_var, _name, _time, definition) # Define the decades variable dtd = self._get_group_name(definition) variable_name = '{}_{}'.format(dtd, _name) max_var_len = len(self._index_dict[frequency]) if max_var_len != len(_var.ravel()): logger.warning('index & variable len differ') logger.warning(' -> ({})'.format(variable_name)) _var = _var.ravel()[:max_var_len] variable = DecadesVariable( {variable_name: _var.ravel()}, index=self._index_dict[frequency], name=variable_name, long_name=definition.get_field(_name).long_name, units='RAW', frequency=frequency, tolerance=self.tolerance, write=False, flag=None, # dtype=_var_dtype ) _file.dataset.add_input(variable)
def _get_var_32(self): return DecadesVariable({TEST_VAR_NAME: TEST_VAR_VALUES * 32}, index=self.test_index_32)