Beispiel #1
0
    def _find_files_to_backup(self):
        """
        Query the database for nodes that were created after the
        the start of the last backup. Return a query set.
        """
        # Go a bit further back to avoid any rounding problems. Set the
        # smallest timestamp to be backed up.
        start_of_backup = (
            self._oldest_object_bk -
            datetime.timedelta(minutes=self._additional_back_time_mins))

        # Find the end of backup for this round using the given _periodicity.
        backup_end_for_this_round = (
            self._oldest_object_bk +
            datetime.timedelta(days=self._periodicity))

        # If the end of the backup is after the given end by the user,
        # adapt it accordingly
        if (self._internal_end_date_of_backup is not None and
                backup_end_for_this_round > self._internal_end_date_of_backup):
            backup_end_for_this_round = self._internal_end_date_of_backup

        # If the end of the backup is after the current time, adapt the end accordingly
        now_timestamp = datetime.datetime.now(dtimezone.get_current_timezone())
        if backup_end_for_this_round > now_timestamp:
            self._logger.info(
                'We can not backup until %s. We will backup until now (%s).',
                backup_end_for_this_round, now_timestamp)
            backup_end_for_this_round = now_timestamp

        # Check if the backup length is below the backup length threshold
        if backup_end_for_this_round - start_of_backup < \
                self._backup_length_threshold:
            self._logger.info(
                'Backup (timestamp) length is below the given threshold. Backup finished'
            )
            return -1, None

        # Construct the queries & query sets
        query_sets = self._get_query_sets(start_of_backup,
                                          backup_end_for_this_round)

        # Set the new start of the backup
        self._oldest_object_bk = backup_end_for_this_round

        # Check if threshold is 0
        if self._backup_length_threshold == datetime.timedelta(hours=0):
            return -2, query_sets

        return 0, query_sets
    def create_value(cls,
                     key,
                     value,
                     subspecifier_value=None,
                     other_attribs={}):
        """
        Create a new list of attributes, without storing them, associated
        with the current key/value pair (and to the given subspecifier,
        e.g. the DbNode for DbAttributes and DbExtras).

        :note: No hits are done on the DB, in particular no check is done
          on the existence of the given nodes.

        :param key: a string with the key to create (can contain the
          separator cls._sep if this is a sub-attribute: indeed, this
          function calls itself recursively)
        :param value: the value to store (a basic data type or a list or a dict)
        :param subspecifier_value: must be None if this class has no
          subspecifier set (e.g., the DbSetting class).
          Must be the value of the subspecifier (e.g., the dbnode) for classes
          that define it (e.g. DbAttribute and DbExtra)
        :param other_attribs: a dictionary of other parameters, to store
          only on the level-zero attribute (e.g. for description in DbSetting).

        :return: always a list of class instances; it is the user
          responsibility to store such entries (typically with a Django
          bulk_create() call).
        """
        import datetime

        from aiida.common import json
        from aiida.common.timezone import is_naive, make_aware, get_current_timezone

        if cls._subspecifier_field_name is None:
            if subspecifier_value is not None:
                raise ValueError('You cannot specify a subspecifier value for '
                                 'class {} because it has no subspecifiers'
                                 ''.format(cls.__name__))
            if issubclass(cls, DbAttributeFunctionality):
                new_entry = db_attribute_base_model(key=key, **other_attribs)
            else:
                new_entry = db_extra_base_model(key=key, **other_attribs)
        else:
            if subspecifier_value is None:
                raise ValueError(
                    'You also have to specify a subspecifier value '
                    'for class {} (the {})'.format(
                        cls.__name__, cls._subspecifier_field_name))
            further_params = other_attribs.copy()
            further_params.update(
                {cls._subspecifier_field_name: subspecifier_value})
            # new_entry = cls(key=key, **further_params)
            if issubclass(cls, DbAttributeFunctionality):
                new_entry = db_attribute_base_model(key=key, **further_params)
            else:
                new_entry = db_extra_base_model(key=key, **further_params)

        list_to_return = [new_entry]

        if value is None:
            new_entry.datatype = 'none'
            new_entry.bval = None
            new_entry.tval = ''
            new_entry.ival = None
            new_entry.fval = None
            new_entry.dval = None

        elif isinstance(value, bool):
            new_entry.datatype = 'bool'
            new_entry.bval = value
            new_entry.tval = ''
            new_entry.ival = None
            new_entry.fval = None
            new_entry.dval = None

        elif isinstance(value, six.integer_types):
            new_entry.datatype = 'int'
            new_entry.ival = value
            new_entry.tval = ''
            new_entry.bval = None
            new_entry.fval = None
            new_entry.dval = None

        elif isinstance(value, float):
            new_entry.datatype = 'float'
            new_entry.fval = value
            new_entry.tval = ''
            new_entry.ival = None
            new_entry.bval = None
            new_entry.dval = None

        elif isinstance(value, six.string_types):
            new_entry.datatype = 'txt'
            new_entry.tval = value
            new_entry.bval = None
            new_entry.ival = None
            new_entry.fval = None
            new_entry.dval = None

        elif isinstance(value, datetime.datetime):

            # current timezone is taken from the settings file of django
            if is_naive(value):
                value_to_set = make_aware(value, get_current_timezone())
            else:
                value_to_set = value

            new_entry.datatype = 'date'
            # TODO: time-aware and time-naive datetime objects, see
            # https://docs.djangoproject.com/en/dev/topics/i18n/timezones/#naive-and-aware-datetime-objects
            new_entry.dval = value_to_set
            new_entry.tval = ''
            new_entry.bval = None
            new_entry.ival = None
            new_entry.fval = None

        elif isinstance(value, (list, tuple)):

            new_entry.datatype = 'list'
            new_entry.dval = None
            new_entry.tval = ''
            new_entry.bval = None
            new_entry.ival = len(value)
            new_entry.fval = None

            for i, subv in enumerate(value):
                # I do not need get_or_create here, because
                # above I deleted all children (and I
                # expect no concurrency)
                # NOTE: I do not pass other_attribs
                list_to_return.extend(
                    cls.create_value(key=('{}{}{:d}'.format(key, cls._sep, i)),
                                     value=subv,
                                     subspecifier_value=subspecifier_value))

        elif isinstance(value, dict):

            new_entry.datatype = 'dict'
            new_entry.dval = None
            new_entry.tval = ''
            new_entry.bval = None
            new_entry.ival = len(value)
            new_entry.fval = None

            for subk, subv in value.items():
                cls.validate_key(subk)

                # I do not need get_or_create here, because
                # above I deleted all children (and I
                # expect no concurrency)
                # NOTE: I do not pass other_attribs
                list_to_return.extend(
                    cls.create_value(key='{}{}{}'.format(key, cls._sep, subk),
                                     value=subv,
                                     subspecifier_value=subspecifier_value))
        else:
            try:
                jsondata = json.dumps(value)
            except TypeError:
                raise ValueError(
                    'Unable to store the value: it must be either a basic datatype, or json-serializable: {}'
                    .format(value))

            new_entry.datatype = 'json'
            new_entry.tval = jsondata
            new_entry.bval = None
            new_entry.ival = None
            new_entry.fval = None

        return list_to_return
Beispiel #3
0
def _deserialize_attribute(mainitem,
                           subitems,
                           sep,
                           original_class=None,
                           original_pk=None,
                           lesserrors=False):
    """Deserialize a single attribute.

    :param mainitem: the main item (either the attribute itself for base
      types (None, string, ...) or the main item for lists and dicts.
      Must contain the 'key' key and also the following keys:
      datatype, tval, fval, ival, bval, dval.
      NOTE that a type check is not performed! tval is expected to be a string,
      dval a date, etc.
    :param subitems: must be a dictionary of dictionaries. In the top-level dictionary,
      the key must be the key of the attribute, stripped of all prefixes
      (i.e., if the mainitem has key 'a.b' and we pass subitems
      'a.b.0', 'a.b.1', 'a.b.1.c', their keys must be '0', '1', '1.c').
      It must be None if the value is not iterable (int, str,
      float, ...).
      It is an empty dictionary if there are no subitems.
    :param sep: a string, the separator between subfields (to separate the
      name of a dictionary from the keys it contains, for instance)
    :param original_class: if these elements come from a specific subclass
      of DbMultipleValueAttributeBaseClass, pass here the class (note: the class,
      not the instance!). This is used only in case the wrong number of elements
      is found in the raw data, to print a more meaningful message (if the class
      has a dbnode associated to it)
    :param original_pk: if the elements come from a specific subclass
      of DbMultipleValueAttributeBaseClass that has a dbnode associated to it,
      pass here the PK integer. This is used only in case the wrong number
      of elements is found in the raw data, to print a more meaningful message
    :param lesserrors: If set to True, in some cases where the content of the
      DB is not consistent but data is still recoverable,
      it will just log the message rather than raising
      an exception (e.g. if the number of elements of a dictionary is different
      from the number declared in the ival field).

    :return: the deserialized value
    :raise aiida.backends.djsite.db.migrations.DeserializationException: if an error occurs"""

    from aiida.common import json
    from aiida.common.timezone import (is_naive, make_aware,
                                       get_current_timezone)

    if mainitem['datatype'] in ['none', 'bool', 'int', 'float', 'txt']:
        if subitems:
            raise DeserializationException("'{}' is of a base type, "
                                           'but has subitems!'.format(
                                               mainitem.key))
        return _deserialize_basic_type(mainitem)

    if mainitem['datatype'] == 'date':
        if subitems:
            raise DeserializationException("'{}' is of a base type, "
                                           'but has subitems!'.format(
                                               mainitem.key))
        if is_naive(mainitem['dval']):
            return make_aware(mainitem['dval'], get_current_timezone())
        return mainitem['dval']

    if mainitem['datatype'] == 'list':
        return deserialize_list(mainitem, subitems, sep, original_class,
                                original_pk, lesserrors)
    if mainitem['datatype'] == 'dict':
        return deserialize_dict(mainitem, subitems, sep, original_class,
                                original_pk, lesserrors)
    if mainitem['datatype'] == 'json':
        try:
            return json.loads(mainitem['tval'])
        except ValueError:
            raise DeserializationException(
                'Error in the content of the json field')
    else:
        raise DeserializationException(
            "The type field '{}' is not recognized".format(
                mainitem['datatype']))
Beispiel #4
0
def _deserialize_attribute(mainitem,
                           subitems,
                           sep,
                           original_class=None,
                           original_pk=None,
                           lesserrors=False):
    """
    Deserialize a single attribute.

    :param mainitem: the main item (either the attribute itself for base
      types (None, string, ...) or the main item for lists and dicts.
      Must contain the 'key' key and also the following keys:
      datatype, tval, fval, ival, bval, dval.
      NOTE that a type check is not performed! tval is expected to be a string,
      dval a date, etc.
    :param subitems: must be a dictionary of dictionaries. In the top-level dictionary,
      the key must be the key of the attribute, stripped of all prefixes
      (i.e., if the mainitem has key 'a.b' and we pass subitems
      'a.b.0', 'a.b.1', 'a.b.1.c', their keys must be '0', '1', '1.c').
      It must be None if the value is not iterable (int, str,
      float, ...).
      It is an empty dictionary if there are no subitems.
    :param sep: a string, the separator between subfields (to separate the
      name of a dictionary from the keys it contains, for instance)
    :param original_class: if these elements come from a specific subclass
      of DbMultipleValueAttributeBaseClass, pass here the class (note: the class,
      not the instance!). This is used only in case the wrong number of elements
      is found in the raw data, to print a more meaningful message (if the class
      has a dbnode associated to it)
    :param original_pk: if the elements come from a specific subclass
      of DbMultipleValueAttributeBaseClass that has a dbnode associated to it,
      pass here the PK integer. This is used only in case the wrong number
      of elements is found in the raw data, to print a more meaningful message
    :param lesserrors: If set to True, in some cases where the content of the
      DB is not consistent but data is still recoverable,
      it will just log the message rather than raising
      an exception (e.g. if the number of elements of a dictionary is different
      from the number declared in the ival field).

    :return: the deserialized value
    :raise aiida.backends.djsite.db.migrations.DeserializationException: if an error occurs
    """
    from aiida.common import json
    from aiida.common.timezone import (is_naive, make_aware,
                                       get_current_timezone)

    from aiida.common import AIIDA_LOGGER

    if mainitem['datatype'] == 'none':
        if subitems:
            raise DeserializationException("'{}' is of a base type, "
                                           'but has subitems!'.format(
                                               mainitem.key))
        return None
    elif mainitem['datatype'] == 'bool':
        if subitems:
            raise DeserializationException("'{}' is of a base type, "
                                           'but has subitems!'.format(
                                               mainitem.key))
        return mainitem['bval']
    elif mainitem['datatype'] == 'int':
        if subitems:
            raise DeserializationException("'{}' is of a base type, "
                                           'but has subitems!'.format(
                                               mainitem.key))
        return mainitem['ival']
    elif mainitem['datatype'] == 'float':
        if subitems:
            raise DeserializationException("'{}' is of a base type, "
                                           'but has subitems!'.format(
                                               mainitem.key))
        return mainitem['fval']
    elif mainitem['datatype'] == 'txt':
        if subitems:
            raise DeserializationException("'{}' is of a base type, "
                                           'but has subitems!'.format(
                                               mainitem.key))
        return mainitem['tval']
    elif mainitem['datatype'] == 'date':
        if subitems:
            raise DeserializationException("'{}' is of a base type, "
                                           'but has subitems!'.format(
                                               mainitem.key))
        if is_naive(mainitem['dval']):
            return make_aware(mainitem['dval'], get_current_timezone())
        else:
            return mainitem['dval']

    elif mainitem['datatype'] == 'list':
        # subitems contains all subitems, here I store only those of
        # deepness 1, i.e. if I have subitems '0', '1' and '1.c' I
        # store only '0' and '1'
        firstlevelsubdict = {k: v for k, v in subitems.items() if sep not in k}

        # For checking, I verify the expected values
        expected_set = set(['{:d}'.format(i) for i in range(mainitem['ival'])])
        received_set = set(firstlevelsubdict.keys())
        # If there are more entries than expected, but all expected
        # ones are there, I just issue an error but I do not stop.

        if not expected_set.issubset(received_set):
            if (original_class is not None
                    and original_class._subspecifier_field_name is not None):
                subspecifier_string = '{}={} and '.format(
                    original_class._subspecifier_field_name, original_pk)
            else:
                subspecifier_string = ''
            if original_class is None:
                sourcestr = 'the data passed'
            else:
                sourcestr = original_class.__name__

            raise DeserializationException(
                'Wrong list elements stored in {} for '
                "{}key='{}' ({} vs {})".format(sourcestr, subspecifier_string,
                                               mainitem['key'], expected_set,
                                               received_set))
        if expected_set != received_set:
            if (original_class is not None
                    and original_class._subspecifier_field_name is not None):
                subspecifier_string = '{}={} and '.format(
                    original_class._subspecifier_field_name, original_pk)
            else:
                subspecifier_string = ''
            if original_class is None:
                sourcestr = 'the data passed'
            else:
                sourcestr = original_class.__name__

            msg = ('Wrong list elements stored in {} for '
                   "{}key='{}' ({} vs {})".format(sourcestr,
                                                  subspecifier_string,
                                                  mainitem['key'],
                                                  expected_set, received_set))
            if lesserrors:
                AIIDA_LOGGER.error(msg)
            else:
                raise DeserializationException(msg)

        # I get the values in memory as a dictionary
        tempdict = {}
        for firstsubk, firstsubv in firstlevelsubdict.items():
            # I call recursively the same function to get subitems
            newsubitems = {
                k[len(firstsubk) + len(sep):]: v
                for k, v in subitems.items() if k.startswith(firstsubk + sep)
            }
            tempdict[firstsubk] = _deserialize_attribute(
                mainitem=firstsubv,
                subitems=newsubitems,
                sep=sep,
                original_class=original_class,
                original_pk=original_pk)

        # And then I put them in a list
        retlist = [tempdict['{:d}'.format(i)] for i in range(mainitem['ival'])]
        return retlist
    elif mainitem['datatype'] == 'dict':
        # subitems contains all subitems, here I store only those of
        # deepness 1, i.e. if I have subitems '0', '1' and '1.c' I
        # store only '0' and '1'
        firstlevelsubdict = {k: v for k, v in subitems.items() if sep not in k}

        if len(firstlevelsubdict) != mainitem['ival']:
            if (original_class is not None
                    and original_class._subspecifier_field_name is not None):
                subspecifier_string = '{}={} and '.format(
                    original_class._subspecifier_field_name, original_pk)
            else:
                subspecifier_string = ''
            if original_class is None:
                sourcestr = 'the data passed'
            else:
                sourcestr = original_class.__name__

            msg = ('Wrong dict length stored in {} for '
                   "{}key='{}' ({} vs {})".format(sourcestr,
                                                  subspecifier_string,
                                                  mainitem['key'],
                                                  len(firstlevelsubdict),
                                                  mainitem['ival']))
            if lesserrors:
                AIIDA_LOGGER.error(msg)
            else:
                raise DeserializationException(msg)

        # I get the values in memory as a dictionary
        tempdict = {}
        for firstsubk, firstsubv in firstlevelsubdict.items():
            # I call recursively the same function to get subitems
            newsubitems = {
                k[len(firstsubk) + len(sep):]: v
                for k, v in subitems.items() if k.startswith(firstsubk + sep)
            }
            tempdict[firstsubk] = _deserialize_attribute(
                mainitem=firstsubv,
                subitems=newsubitems,
                sep=sep,
                original_class=original_class,
                original_pk=original_pk)

        return tempdict
    elif mainitem['datatype'] == 'json':
        try:
            return json.loads(mainitem['tval'])
        except ValueError:
            raise DeserializationException(
                'Error in the content of the json field')
    else:
        raise DeserializationException(
            "The type field '{}' is not recognized".format(
                mainitem['datatype']))
Beispiel #5
0
# Keep DEBUG = False! Otherwise every query is stored in memory
DEBUG = False

ADMINS = []
ALLOWED_HOSTS = []

MANAGERS = ADMINS

# Language code for this installation. All choices can be found here:
# http://www.i18nguy.com/unicode/language-identifiers.html
LANGUAGE_CODE = 'en-us'

# Local time zone for this installation. Always choose the system timezone.
# Note: This causes django to set the 'TZ' environment variable, which is read by tzlocal from then onwards.
# See https://docs.djangoproject.com/en/2.2/ref/settings/#std:setting-TIME_ZONE
TIME_ZONE = get_current_timezone().zone

SITE_ID = 1

# If you set this to False, Django will make some optimizations so as not
# to load the internationalization machinery.
USE_I18N = False

# If you set this to False, Django will not format dates, numbers and
# calendars according to the current locale.
USE_L10N = False

# If you set this to False, Django will not use timezone-aware datetimes.
# For AiiDA, leave it as True, otherwise setting properties with dates will not work.
USE_TZ = settings.USE_TZ
Beispiel #6
0
    def _read_backup_info_from_dict(self, backup_variables):  # pylint: disable=too-many-branches,too-many-statements
        """
        This method reads the backup information from the given dictionary and
        sets the needed class variables.
        """
        # Setting the oldest backup date. This will be used as start of
        # the new backup procedure.
        #
        # If the oldest backup date is not set, then find the oldest
        # creation timestamp and set it as the oldest backup date.
        if backup_variables.get(self.OLDEST_OBJECT_BK_KEY) is None:
            query_node_res = self._query_first_node()

            if not query_node_res:
                self._logger.error('The oldest modification date was not found.')
                raise BackupError('The oldest modification date was not found.')

            oldest_timestamps = []
            if query_node_res:
                oldest_timestamps.append(query_node_res[0].ctime)

            self._oldest_object_bk = min(oldest_timestamps)
            self._logger.info(
                'Setting the oldest modification date to the creation date of the oldest object '
                '(%s)', self._oldest_object_bk
            )

        # If the oldest backup date is not None then try to parse it
        else:
            try:
                self._oldest_object_bk = parse(backup_variables.get(self.OLDEST_OBJECT_BK_KEY))
                if self._oldest_object_bk.tzinfo is None:
                    curr_timezone = dtimezone.get_current_timezone()
                    self._oldest_object_bk = dtimezone.get_current_timezone().localize(self._oldest_object_bk)
                    self._logger.info(
                        'No timezone defined in the oldest modification date timestamp. Setting current timezone (%s).',
                        curr_timezone.zone
                    )
            # If it is not parsable...
            except ValueError:
                self._logger.error('We did not manage to parse the start timestamp of the last backup.')
                raise

        # Setting the backup directory & normalizing it
        self._backup_dir = os.path.normpath(backup_variables.get(self.BACKUP_DIR_KEY))
        if (not self._ignore_backup_dir_existence_check and not os.path.isdir(self._backup_dir)):
            self._logger.error('The given backup directory does not exist.')
            raise BackupError('The given backup directory does not exist.')

        # You can not set an end-of-backup date and end days from the backup
        # that you should stop.
        if (
            backup_variables.get(self.DAYS_TO_BACKUP_KEY) is not None and
            backup_variables.get(self.END_DATE_OF_BACKUP_KEY) is not None
        ):
            self._logger.error('Only one end of backup date can be set.')
            raise BackupError('Only one backup end can be set (date or days from backup start.')

        # Check if there is an end-of-backup date
        elif backup_variables.get(self.END_DATE_OF_BACKUP_KEY) is not None:
            try:
                self._end_date_of_backup = parse(backup_variables.get(self.END_DATE_OF_BACKUP_KEY))

                if self._end_date_of_backup.tzinfo is None:
                    curr_timezone = dtimezone.get_current_timezone()
                    self._end_date_of_backup = \
                        curr_timezone.localize(
                            self._end_date_of_backup)
                    self._logger.info(
                        'No timezone defined in the end date of backup timestamp. Setting current timezone (%s).',
                        curr_timezone.zone
                    )

                self._internal_end_date_of_backup = self._end_date_of_backup
            except ValueError:
                self._logger.error('The end date of the backup could not be parsed correctly')
                raise

        # Check if there is defined a days to backup
        elif backup_variables.get(self.DAYS_TO_BACKUP_KEY) is not None:
            try:
                self._days_to_backup = int(backup_variables.get(self.DAYS_TO_BACKUP_KEY))
                self._internal_end_date_of_backup = (
                    self._oldest_object_bk + datetime.timedelta(days=self._days_to_backup)
                )
            except ValueError:
                self._logger.error('The days to backup should be an integer')
                raise
        # If the backup end is not set, then the ending date remains open

        # Parse the backup periodicity.
        try:
            self._periodicity = int(backup_variables.get(self.PERIODICITY_KEY))
        except ValueError:
            self._logger.error('The backup _periodicity should be an integer')
            raise

        # Parse the backup length threshold
        try:
            hours_th = int(backup_variables.get(self.BACKUP_LENGTH_THRESHOLD_KEY))
            self._backup_length_threshold = datetime.timedelta(hours=hours_th)
        except ValueError:
            self._logger.error('The backup length threshold should be an integer')
            raise