def test_collection_values(self):
     a = AttributesCollection()
     a.add(Attribute('key1', 'value1'))
     a.add(Attribute('key2', 'value2'))
     self.assertEquals(len(a.values), 2)
     for k in a.values:
         self.assertIn(k, ('value1', 'value2'))
 def test_call_instance(self):
     a = AttributesCollection()
     a.add(Attribute('key1', 'value1'))
     a.add(Attribute('key2', 'value2'))
     d = a()
     for k, v in zip(('key1', 'key2'), ('value1', 'value2')):
         self.assertEquals(d[k], v)
    def test_collection_dict_property(self):
        a = AttributesCollection()
        a.add(Attribute('key2', 'value2'))
        a.add(Attribute('key1', 'value1'))
        d = a.dict
        self.assertEquals(len(d.items()), 2)
        self.assertEquals(d['key1'], 'value1')
        self.assertEquals(d['key2'], 'value2')

        # Ordered dict should be alphabetical - reverse of the order adding in
        # this case.
        self.assertEquals(list(d.keys())[0], 'key1')
 def test_required_attributes_from_definition(self):
     a = AttributesCollection(definition=DEF_PATH, )
     for key in ['Required', 'RequiredDefault']:
         self.assertIn(key, a.REQUIRED_ATTRIBUTES)
 def test_create_collection_with_path_definition(self):
     a = AttributesCollection(definition=DEF_PATH)
 def test_create_collection_with_no_definition(self):
     a = AttributesCollection()
 def test_get_item_non_compliancify(self):
     a = AttributesCollection()
     a.add(Attribute('key1', 'value1'))
     a.add(Attribute('key2', 'value2'))
     self.assertEquals(a['key1'], 'value1')
     self.assertEquals(a['key2'], 'value2')
Beispiel #8
0
    def __init__(self, *args, **kwargs):
        """
        Initialize a class instance. Arbitrary args and kwargs are accepted.
        These will be passed to pandas during the initial creation of a
        DataFrame, other than keys which are defined in the `standard` and:

        Args:
            name (str, optional): the name of the variable
            write (bool, optional): whether or not this variable should be
                written to file as an output. Default `True`.
            flag (Object, optional): a class to be used for holding data quality
                information. Default is `DecadesClassicFlag`.
            standard (str, optional): a metadata 'standard' which should
                be adhered to. Default is `ppodd.standard.core`.
            strict (bool, optional): whether the `standard` should be strictly
                enforced. Default is `True`.
            tolerance (int, optional): tolerance to use when reindexing onto a
                regular index.
            flag_postfix (str, optional): the postfix to add to a
                variable when output to indicate it is a quality flag. Default
                is `FLAG`.
        """

        _flag = kwargs.pop('flag', DecadesClassicFlag)
        _standard = kwargs.pop('standard', 'faam_data')
        _strict = kwargs.pop('strict', False)
        _tolerance = kwargs.pop('tolerance', 0)
        _flag_postfix = kwargs.pop('flag_postfix', 'FLAG')

        self.attrs = AttributesCollection(dataset=self,
                                          definition='.'.join(
                                              (_standard,
                                               'VariableAttributes')),
                                          strict=_strict)
        self.dtype = kwargs.pop('dtype', None)
        self.name = kwargs.pop('name', None)
        self.write = kwargs.pop('write', True)

        # Set attributes given as keyword arguments
        _attrs = self.attrs.REQUIRED_ATTRIBUTES + self.attrs.OPTIONAL_ATTRIBUTES
        for _attr in _attrs:
            # Get the default value of an attribute, if it exists
            try:
                _default = self.attrs._definition.schema(
                )['properties'][_attr]['ppodd_default']
            except KeyError:
                _default = None

            # Pop the attribute off the keyword stack, and set it if it has a
            # value

            _context = None
            _context_type = None

            if _default:
                try:
                    rex = re.compile('^<call (.+)>$')
                    hook = rex.findall(_default)[0]
                    _default = [i.strip() for i in hook.strip().split()][0]
                    _context = self
                    _context_type = Context.ATTR
                except (TypeError, IndexError):
                    pass

            _val = kwargs.pop(_attr, _default)
            if _val is not None:
                self.attrs.add(
                    Attribute(_attr,
                              _val,
                              context=_context,
                              context_type=_context_type))

        # Create an interim DataFrame, and infer its frequency
        _df = pd.DataFrame(*args, **kwargs)
        _freq = self._get_freq(df=_df)

        # Create an index spanning the variable value at the correct frequency
        _index = pd.date_range(start=_df.index[0],
                               end=_df.index[-1],
                               freq=_freq)

        # If no variable name is given, infer it from the first column of the
        # dataframe
        if self.name is None:
            self.name = _df.columns[0]

        # Deal with non-unique entries in the dataframe, by selecting the last
        # element
        if len(_df.index) != len(_df.index.unique()):
            _df = _df.groupby(_df.index).last()

        # Ensure input is monotonic
        _df = _df.sort_index()

        # Create the data array that we're going to keep. We're going to
        # reindex the dataframe onto the complete index, and downcast it to the
        # smallest reasonable datatype. This is a memory saving trick, as we
        # don't have to have a 64-bit index associated with every variable.
        array = self._downcast(
            np.array(
                _df.reindex(_index,
                            tolerance=_tolerance,
                            method='nearest',
                            limit=1).values.flatten()))
        if self.dtype:
            array = array.astype(self.dtype)
        self.array = array

        # t0 and t1 are the start and end times of the array, which we're going
        # to store as we dispose of the index
        self.t0 = _index[0]
        self.t1 = _index[-1]

        # Create the QC Flag array, and add the name of the flag variable to
        # the 'ancillary_variables' attribute. TODO: should we check if this
        # attribute already exists?
        if _flag is not None:
            self.flag = _flag(self, postfix=_flag_postfix)
            self.attrs.add(
                Attribute('ancillary_variables',
                          f'{self.name}_{_flag_postfix}'))
        else:
            if self.name.endswith('_CU'):
                self.attrs.add(
                    Attribute('ancillary_variables',
                              f'{self.name[:-3]}_{_flag_postfix}'))
            self.flag = _flag
 def test_add_attribute(self):
     a = AttributesCollection()
     a.add(Attribute('key', 'value'))
     self.assertIn('key', [i.key for i in a._attributes])
 def test_remove_attribute_by_key(self):
     a = AttributesCollection()
     a.add(Attribute('key', 'value'))
     self.assertEquals(a['key'], 'value')
     a.remove('key')
     self.assertRaises(KeyError, lambda: a['key'])
 def test_set_item_as_dict(self):
     a = AttributesCollection()
     a['key'] = {'level1': 'value1', 'level2': 'value2'}
     self.assertEquals(a['key_level1'], 'value1')
     self.assertEquals(a['key_level2'], 'value2')
 def test_set_item(self):
     a = AttributesCollection()
     a['key1'] = 'value'
     self.assertEquals(a['key1'], 'value')
 def test_add_invalid_key_in_strict_mode(self):
     a = AttributesCollection(definition=Attributes)
     self.assertRaises(NonStandardAttributeError,
                       lambda: a.add(Attribute('invalid', 'invalid')))
 def test_add_duplicate_key(self):
     a = AttributesCollection()
     a.add(Attribute('key1', 'value1'))
     a.add(Attribute('key1', 'value2'))
     self.assertEquals(a['key1'], 'value2')
 def test_option_attributes_from_definition(self):
     a = AttributesCollection(definition=DEF_PATH)
     self.assertIn('OptionalAttr', a.OPTIONAL_ATTRIBUTES)
 def test_add_data_attribute(self):
     a = AttributesCollection()
     a.add(Attribute('key', lambda: 'value'))
     self.assertEquals(a['key'], 'value')
 def test_required_attributes_with_no_definition(self):
     a = AttributesCollection()
     self.assertEquals(a.REQUIRED_ATTRIBUTES, [])
 def test_static_items(self):
     a = AttributesCollection()
     a.add(Attribute('key1', lambda: 'value1'))
     a.add(Attribute('key2', 'value2'))
     self.assertEquals(len(a.static_items()), 1)
Beispiel #19
0
    def __init__(self,
                 date=None,
                 backend=DefaultBackend,
                 writer=NetCDFWriter,
                 pp_group='core',
                 standard='faam_data',
                 strict=True,
                 logfile=None):
        """
        Create a class instance.

        Args:
            date (datetime.datetime, optional): a date representing the date of
                the flight. Default is None, in which case a date is expected
                via a constants file.
            standard (str, optional): the metadata standard to adhere to.
                Default is `ppodd.standard.core`.
            strict (bool, optional): indicates whether the <standard> should be
                strictly enforced. Default is True.
            backend (ppodd.decades.backends.DecadesBackend): the backend to use
                for variable storage. Default is
                ppodd.decades.backends.DefaultBackend.
            pp_group (str, optional): a string pointing indicating which group
                of postprocessing modules should be run. Default is `core`.
            writer (ppodd.writers.writers.DecadesWriter): the writer class to
                use by default. Default is NetCDFWriter.
        """

        self._date = date
        self.lazy = Lazy(self)
        self.readers = []
        self.definitions = []
        self.constants = {}
        self._variable_mods = {}
        self._flag_mods = {}
        self._mod_exclusions = []
        self.pp_modules = []
        self.qa_modules = []
        self.flag_modules = []
        self.completed_modules = []
        self.failed_modules = []
        self._attribute_helpers = []
        self.writer = writer
        self.pp_group = pp_group

        self._dataframes = {}
        self.lon = None
        self.lat = None
        self._garbage_collect = False
        self._qa_dir = None

        self._decache = False
        self._trim = False
        self._standard = standard
        self._strict_mode = strict
        self.allow_overwrite = False
        self._backend = backend()

        self.globals = AttributesCollection(dataset=self,
                                            definition='.'.join(
                                                (standard,
                                                 'GlobalAttributes')),
                                            strict=strict)

        for helper in attribute_helpers:
            self._attribute_helpers.append(helper(self))
 def test_optional_attributes_with_no_definition(self):
     a = AttributesCollection()
     self.assertEquals(a.OPTIONAL_ATTRIBUTES, [])