예제 #1
0
    def test_variable_merge_interleaved(self):
        index1 = pd.date_range(start=datetime.datetime(2000, 1, 1),
                               periods=10,
                               freq='2S')

        index2 = pd.date_range(start=datetime.datetime(2000, 1, 1, 0, 0, 1),
                               periods=10,
                               freq='2S')

        v1 = DecadesVariable({TEST_VAR_NAME: TEST_VAR_VALUES},
                             index=index1,
                             frequency=1)

        v2 = DecadesVariable({TEST_VAR_NAME: TEST_VAR_VALUES},
                             index=index2,
                             frequency=1)

        v1.merge(v2)

        self.assertEqual(v1.t0, index1[0])
        self.assertEqual(v1.t1, index2[-1])
        self.assertEqual(len(v1), 2 * (len(TEST_VAR_VALUES)))

        expected_array = []
        for i in TEST_VAR_VALUES:
            expected_array.append(i)
            expected_array.append(i)

        for a, b in zip(v1.array, expected_array):
            self.assertEqual(a, b)
예제 #2
0
    def process(self):
        """
        Module entry hook.
        """

        # Get all of the required inputs.
        self.get_dataframe()

        # Run required calculations in turn. These are stored in instance
        # state.
        self.calc_mach()
        self.calc_ias()
        self.calc_tas()

        # Create output variables for the indicated and true airspeeds.
        ias = DecadesVariable(self.d['IAS_RVSM'], flag=DecadesBitmaskFlag)
        tas = DecadesVariable(self.d['TAS_RVSM'], flag=DecadesBitmaskFlag)

        # Flag the data wherever the mach number is out of range.
        for _var in (ias, tas):
            _var.flag.add_mask(
                self.d['MACHNO_FLAG'], 'mach out of range',
                ('Either static or dynamic pressure out of acceptable limits '
                 'during calculation of mach number.'))
            self.add_output(_var)
예제 #3
0
    def read(self):
        if len(self.files) > 1:
            raise ValueError('Only 1 FGGA file currently accepted')

        reader = NasaAmes1001Reader(self.files[0].filepath)
        reader.read()
        self.metadata = reader.meta
        self.data = reader.data
        self._extract_units()

        short_names = [
            'FGGA_CO2', 'FGGA_CO2_FLAG', 'FGGA_CH4', 'FGGA_CH4_FLAG'
        ]
        dataset = self.files[0].dataset

        for i in (0, 2):
            var = DecadesVariable(
                {short_names[i]: self.data[self.metadata['vname'][i]]},
                long_name=self.metadata['long_names'][i],
                units=self.metadata['units'][i],
                frequency=1,
                flag=None,
                write=False)

            flag = DecadesVariable(
                {short_names[i + 1]: self.data[self.metadata['vname'][i + 1]]},
                units=None,
                frequency=1,
                flag=None,
                write=False)
            dataset.add_input(var)
            dataset.add_input(flag)

        dataset.add_global('comment', '\n'.join(self.metadata['ncom']))
예제 #4
0
 def test_dataset_time_bounds_inputs(self):
     v1 = DecadesVariable({'a': TEST_VAR_VALUES}, index=self.test_index_1)
     index2 = self.test_index_1 + datetime.timedelta(minutes=1)
     v2 = DecadesVariable({'b': TEST_VAR_VALUES}, index=index2)
     self.d.add_input(v1)
     self.d.add_input(v2)
     bnds = self.d.time_bounds()
     self.assertEqual(bnds[0], self.test_index_1[0])
     self.assertEqual(bnds[1], index2[-1])
예제 #5
0
    def test_list_variables(self):
        v1 = DecadesVariable({'a': TEST_VAR_VALUES},
                             index=self.test_index_1,
                             frequency=1)
        v2 = DecadesVariable({'b': TEST_VAR_VALUES},
                             index=self.test_index_1,
                             frequency=1)

        self.d.add_input(v1)
        self.d.add_output(v2)

        self.assertIn(v1.name, self.d.variables)
        self.assertIn(v2.name, self.d.variables)
예제 #6
0
    def test_variable_isnumeric(self):
        self.assertTrue(self.v.isnumeric)

        v2 = DecadesVariable({'stringvar': ['a'] * len(TEST_VAR_VALUES)},
                             index=self.test_index_1,
                             frequency=1)
        self.assertFalse(v2.isnumeric)
예제 #7
0
    def read(self):
        for _file in self.files:
            logger.info(f'Reading {_file}')
            dfs, metadata = to_dataframe(_file.filepath)

            for k in dfs.keys():
                df = dfs[k]
                for i, name in enumerate(parser_f[k]['names']):
                    try:
                        _freq = int(
                            np.timedelta64(1, 's') / dfs[k].index.freq.delta)
                    except Exception:
                        break

                    _data = df[name].values

                    _varname = 'SEAPROBE_{}_{}'.format(k,
                                                       name.replace('el', ''))

                    _var = DecadesVariable(
                        {_varname: _data},
                        index=df.index,
                        long_name=parser_f[k]['long_names'][i],
                        frequency=_freq,
                        units=parser_f[k]['units'][i],
                        write=False,
                        flag=None)

                    _file.dataset.add_input(_var)

        for key, value in metadata.items():
            _file.dataset.constants['SEA_{}'.format(key)] = value
예제 #8
0
    def test_takeoff_land_time(self):
        vals = [1, 1, 1, 1, 0, 0, 0, 1, 1, 1]
        prtaft = DecadesVariable({'PRTAFT_wow_flag': vals},
                                 index=self.test_index_1)
        self.d.add_input(prtaft)

        self.assertEqual(self.d.takeoff_time, self.test_index_1[4])
        self.assertEqual(self.d.landing_time, self.test_index_1[7])
예제 #9
0
 def test_clear_outputs(self):
     v1 = self.v
     v2 = DecadesVariable({'v2': TEST_VAR_VALUES},
                          index=self.test_index_1,
                          frequency=1)
     self.d.add_output(v1)
     self.d.add_output(v2)
     self.d.clear_outputs()
     self.assertEqual(self.d.outputs, [])
예제 #10
0
 def test_outputs_list(self):
     v1 = self.v
     v2 = DecadesVariable({'v2': TEST_VAR_VALUES},
                          index=self.test_index_1,
                          frequency=1)
     self.d.add_output(v1)
     self.d.add_output(v2)
     self.assertIn(v1, self.d.outputs)
     self.assertIn(v2, self.d.outputs)
예제 #11
0
    def read(self):
        for _file in self.files:
            date = None
            rex = re.compile('Date.*(\d{2}/\d{2}/\d{2}).*')
            cnt = 0
            with open(_file.filepath, 'r') as f:
                while True:
                    # Scan through the file until we have the date and the
                    # header line
                    cnt += 1
                    line = f.readline()
                    header = [i.strip() for i in line.split(',')]
                    if date is None:
                        _date = rex.findall(line)
                        if _date:
                            date = datetime.datetime.strptime(
                                _date[0], '%m/%d/%y').date()
                    if len(header) > 50:
                        break

            # The header isn't quite right - We first want to remove the first
            # element (Time, as this will be the index) and add 'A' and 'B'
            # versions of the final element
            header = header[1:]
            header.append(header[-1])
            header[-2] += ' A'
            header[-1] += ' B'

            # Read the data using pandas, ensuring uniqueness of the index
            data = pd.read_csv(_file.filepath,
                               header=0,
                               names=header,
                               skiprows=cnt - 1,
                               index_col=0,
                               parse_dates=True)
            data = data.groupby(data.index).last()

            # It's only the time that's reported. Using parse_dates will use
            # today as the date, so we need to correct for this using the date
            # extracted from the header of the data file
            delta = datetime.date.today() - date
            data.index -= delta

            for col in data.columns:
                name = col.replace(' ', '_')
                if not name.startswith('CCN_'):
                    name = f'CCN_{name}'
                _file.dataset.add_input(
                    DecadesVariable({name: data[col]},
                                    units=None,
                                    frequency=1,
                                    flag=None,
                                    write=False))
예제 #12
0
    def setUp(self):
        self.test_index_1 = pd.date_range(start=datetime.datetime(2000, 1, 1),
                                          periods=10,
                                          freq='S')

        self.test_index_32 = pd.date_range(start=datetime.datetime(2000, 1, 1),
                                           periods=320,
                                           freq='31250000N')

        self.d = DecadesDataset(datetime.datetime(2000, 1, 1))
        self.v = DecadesVariable({TEST_VAR_NAME: TEST_VAR_VALUES},
                                 index=self.test_index_1,
                                 frequency=1)
예제 #13
0
    def test_variable_merge_noncontiguous(self):
        index1 = pd.date_range(start=datetime.datetime(2000, 1, 1),
                               periods=10,
                               freq='S')

        index2 = pd.date_range(start=datetime.datetime(2000, 1, 1, 0, 0, 15),
                               periods=10,
                               freq='S')

        v1 = DecadesVariable({TEST_VAR_NAME: TEST_VAR_VALUES},
                             index=index1,
                             frequency=1)

        v2 = DecadesVariable({TEST_VAR_NAME: TEST_VAR_VALUES},
                             index=index2,
                             frequency=1)

        v1.merge(v2)

        self.assertEqual(v1.t0, index1[0])
        self.assertEqual(v1.t1, index2[-1])
        self.assertEqual(len(v1), 2 * (len(TEST_VAR_VALUES)) + 5)
예제 #14
0
    def test_instance(cls, dataset=None):
        """
        Return a test instance of a postprocessing module, initialized with a
        DecadesDataset containing the modules test data.
        """
        now = datetime.datetime.now().replace(microsecond=0)

        if dataset is None:
            d = DecadesDataset(now.date())
        else:
            d = dataset

        if callable(cls.test):
            # pylint: disable=not-callable
            _test = cls.test()
        else:
            _test = cls.test

        for key, val in _test.items():
            _type, *_values = val

            if val[0] == 'const':
                d.constants[key] = val[1]

            elif val[0] == 'data':

                _values, _freq = val[1:]
                _dt = datetime.timedelta(seconds=1 / _freq)
                freq = '{}N'.format((1 / _freq) * 1e9)

                start_time = datetime.datetime(*d.date.timetuple()[:3])
                end_time = (start_time +
                            datetime.timedelta(seconds=len(_values)) - _dt)

                hz1_index = pd.date_range(start=start_time,
                                          periods=len(_values),
                                          freq='S')
                full_index = pd.date_range(start=start_time,
                                           end=end_time,
                                           freq=freq)

                data = pd.Series(_values,
                                 hz1_index).reindex(full_index).interpolate()

                var = DecadesVariable(data, name=key, frequency=_freq)
                d.add_input(var)

        return cls(d, test_mode=True)
예제 #15
0
    def read(self):
        for _file in self.files:
            df = pd.read_csv(_file.filepath, index_col=[0], parse_dates=[0])
            _freq = int(1 / (df.index[1] - df.index[0]).total_seconds())
            print(df)
            print(type(df.index[0]))
            for variable_name in df.columns:
                variable = DecadesVariable(df[variable_name],
                                           index=df.index,
                                           name=variable_name,
                                           long_name=variable_name,
                                           units='RAW',
                                           frequency=_freq,
                                           write=False,
                                           flag=None)

                _file.dataset.add_input(variable)
예제 #16
0
    def read(self):
        for _file in self.files:
            logger.info(f'Reading {_file}...')
            with netCDF4.Dataset(_file.filepath) as nc:
                time = pd.DatetimeIndex(
                    netCDF4.num2date(nc['Time'][:],
                                     units=nc['Time'].units,
                                     only_use_cftime_datetimes=False,
                                     only_use_python_datetimes=True))

                for var in nc.variables:
                    if var.endswith('FLAG') or var == 'Time':
                        continue

                    # Get the frequency of the variable from the name of the
                    # second dimension. Ugly as sin!
                    try:
                        _freq = int(nc[var].dimensions[1].replace('sps', ''))
                    except IndexError:
                        _freq = 1

                    variable = DecadesVariable(
                        {var: nc[var][:].ravel()},
                        index=self._time_at(time, self._var_freq(nc[var])),
                        name=var,
                        write=False,
                        flag=self._flag_class(var, nc),
                        frequency=_freq,
                    )

                    self.flag(variable, nc)

                    for attr in nc[var].ncattrs():
                        # Horrible frequency special case, to cope with v004
                        # core data files, where frequency corresponds to some
                        # nominal measurement frequency, rather than sample
                        # freq in the file
                        if attr == 'frequency':
                            setattr(variable, attr, _freq)
                            continue
                        setattr(variable, attr, getattr(nc[var], attr))

                    _file.dataset.add_input(variable)
예제 #17
0
def get_flagmod_doc(module):
    """
    Returns documentation for a given flagging module, as restructured text.

    Args:
        module: the flagging module to document, expected to be a subclass of 
            ppodd.flags.FlaggingBase.

    Returns:
        A restructured text string containing the module documentation.
    """
    index = module.test_index
    flag = module.test_flag
    d = DecadesDataset()
    for var in module.flagged:
        v = DecadesVariable(pd.Series(flag, index=index, name=var),
                            flag=DecadesBitmaskFlag)
        d.add_output(v)

    mod = module(d)
    mod._flag(test=True)

    output = "\n\n"
    output += '-' * len(module.__name__) + '\n'
    output += module.__name__ + '\n'
    output += '-' * len(module.__name__) + '\n\n'
    output += mod.__doc__

    for var, flag_infos in mod.flags.items():
        mod_var_txt = f'Flagged variable: `{var}`'
        output += '\n\n' + mod_var_txt + '\n'
        output += '-' * len(mod_var_txt) + '\n\n'

        for flag_info in flag_infos:
            output += f'* ``{flag_info[0]}`` - {flag_info[1]}\n'

    return output
예제 #18
0
    def read(self):
        for _file in sorted(self.files,
                            key=lambda x: os.path.basename(x.filepath)):
            self.dataset = _file.dataset
            self._index_dict = {}

            definition = self._get_definition(_file)

            if definition is None:
                warnings.warn(
                    'No CRIO definition found for {}'.format(_file.filepath),
                    RuntimeWarning)
                continue

            dtypes = definition.dtypes

            logger.info('Reading {}...'.format(_file))
            _data = np.fromfile(_file.filepath, dtype=dtypes)

            _read_fail = False
            for d in _data:
                try:
                    data_id = d[0].decode('utf-8')
                except UnicodeDecodeError:
                    _read_fail = True
                    break

                data_id = data_id.replace('$', '')
                if data_id != definition.identifier:
                    _read_fail = True
                    break

            if _read_fail:
                del _data
                _data = self.scan(_file, definition)

            _time = _data[self.time_variable]

            # If there isn't any time info, then get out of here before we
            # raise an exception.
            if not len(_time):
                continue

            # Small amount of error tolerence. If there's a single dodgy
            # timestamp in between two otherwise OK timestamps, assume that
            # it's OK to interpolate across it
            _time = pd.Series(_time)
            _time.loc[(_time - _time.median()).abs() > C_BAD_TIME_DEV] = np.nan
            _time = _time.interpolate(limit=1).values

            _good_times = np.where(~np.isnan(_time))
            _time = _time[_good_times]
            if len(_time) < C_MIN_ARRAY_LEN:
                logger.info('Array shorter than minimum valid length.')
                continue

            for _name, _dtype in _data.dtype.fields.items():
                if _name[0] == '$':
                    continue
                if _name == self.time_variable:
                    continue

                # Pandas doesn't enjoy non-native endianess, so convert data
                # to system byteorder if required
                _var_dtype = _dtype[0].base
                if definition.get_field(_name).byte_order != sys.byteorder:
                    _var = _data[_name].byteswap().newbyteorder()
                    _var_dtype = _var_dtype.newbyteorder()
                else:
                    _var = _data[_name]

                if len(_var.shape) == 1:
                    _var = _var[_good_times]
                else:
                    _var = _var[_good_times, :]

                frequency, index = self._get_index(_var, _name, _time,
                                                   definition)

                # Define the decades variable
                dtd = self._get_group_name(definition)

                variable_name = '{}_{}'.format(dtd, _name)

                max_var_len = len(self._index_dict[frequency])
                if max_var_len != len(_var.ravel()):
                    logger.warning('index & variable len differ')
                    logger.warning(' -> ({})'.format(variable_name))

                _var = _var.ravel()[:max_var_len]

                variable = DecadesVariable(
                    {variable_name: _var.ravel()},
                    index=self._index_dict[frequency],
                    name=variable_name,
                    long_name=definition.get_field(_name).long_name,
                    units='RAW',
                    frequency=frequency,
                    tolerance=self.tolerance,
                    write=False,
                    flag=None,
                    # dtype=_var_dtype
                )

                _file.dataset.add_input(variable)
예제 #19
0
 def _get_var_32(self):
     return DecadesVariable({TEST_VAR_NAME: TEST_VAR_VALUES * 32},
                            index=self.test_index_32)