def _find_files_to_backup(self): """ Query the database for nodes that were created after the the start of the last backup. Return a query set. """ # Go a bit further back to avoid any rounding problems. Set the # smallest timestamp to be backed up. start_of_backup = ( self._oldest_object_bk - datetime.timedelta(minutes=self._additional_back_time_mins)) # Find the end of backup for this round using the given _periodicity. backup_end_for_this_round = ( self._oldest_object_bk + datetime.timedelta(days=self._periodicity)) # If the end of the backup is after the given end by the user, # adapt it accordingly if (self._internal_end_date_of_backup is not None and backup_end_for_this_round > self._internal_end_date_of_backup): backup_end_for_this_round = self._internal_end_date_of_backup # If the end of the backup is after the current time, adapt the end accordingly now_timestamp = datetime.datetime.now(dtimezone.get_current_timezone()) if backup_end_for_this_round > now_timestamp: self._logger.info( 'We can not backup until %s. We will backup until now (%s).', backup_end_for_this_round, now_timestamp) backup_end_for_this_round = now_timestamp # Check if the backup length is below the backup length threshold if backup_end_for_this_round - start_of_backup < \ self._backup_length_threshold: self._logger.info( 'Backup (timestamp) length is below the given threshold. Backup finished' ) return -1, None # Construct the queries & query sets query_sets = self._get_query_sets(start_of_backup, backup_end_for_this_round) # Set the new start of the backup self._oldest_object_bk = backup_end_for_this_round # Check if threshold is 0 if self._backup_length_threshold == datetime.timedelta(hours=0): return -2, query_sets return 0, query_sets
def create_value(cls, key, value, subspecifier_value=None, other_attribs={}): """ Create a new list of attributes, without storing them, associated with the current key/value pair (and to the given subspecifier, e.g. the DbNode for DbAttributes and DbExtras). :note: No hits are done on the DB, in particular no check is done on the existence of the given nodes. :param key: a string with the key to create (can contain the separator cls._sep if this is a sub-attribute: indeed, this function calls itself recursively) :param value: the value to store (a basic data type or a list or a dict) :param subspecifier_value: must be None if this class has no subspecifier set (e.g., the DbSetting class). Must be the value of the subspecifier (e.g., the dbnode) for classes that define it (e.g. DbAttribute and DbExtra) :param other_attribs: a dictionary of other parameters, to store only on the level-zero attribute (e.g. for description in DbSetting). :return: always a list of class instances; it is the user responsibility to store such entries (typically with a Django bulk_create() call). """ import datetime from aiida.common import json from aiida.common.timezone import is_naive, make_aware, get_current_timezone if cls._subspecifier_field_name is None: if subspecifier_value is not None: raise ValueError('You cannot specify a subspecifier value for ' 'class {} because it has no subspecifiers' ''.format(cls.__name__)) if issubclass(cls, DbAttributeFunctionality): new_entry = db_attribute_base_model(key=key, **other_attribs) else: new_entry = db_extra_base_model(key=key, **other_attribs) else: if subspecifier_value is None: raise ValueError( 'You also have to specify a subspecifier value ' 'for class {} (the {})'.format( cls.__name__, cls._subspecifier_field_name)) further_params = other_attribs.copy() further_params.update( {cls._subspecifier_field_name: subspecifier_value}) # new_entry = cls(key=key, **further_params) if issubclass(cls, DbAttributeFunctionality): new_entry = db_attribute_base_model(key=key, **further_params) else: new_entry = db_extra_base_model(key=key, **further_params) list_to_return = [new_entry] if value is None: new_entry.datatype = 'none' new_entry.bval = None new_entry.tval = '' new_entry.ival = None new_entry.fval = None new_entry.dval = None elif isinstance(value, bool): new_entry.datatype = 'bool' new_entry.bval = value new_entry.tval = '' new_entry.ival = None new_entry.fval = None new_entry.dval = None elif isinstance(value, six.integer_types): new_entry.datatype = 'int' new_entry.ival = value new_entry.tval = '' new_entry.bval = None new_entry.fval = None new_entry.dval = None elif isinstance(value, float): new_entry.datatype = 'float' new_entry.fval = value new_entry.tval = '' new_entry.ival = None new_entry.bval = None new_entry.dval = None elif isinstance(value, six.string_types): new_entry.datatype = 'txt' new_entry.tval = value new_entry.bval = None new_entry.ival = None new_entry.fval = None new_entry.dval = None elif isinstance(value, datetime.datetime): # current timezone is taken from the settings file of django if is_naive(value): value_to_set = make_aware(value, get_current_timezone()) else: value_to_set = value new_entry.datatype = 'date' # TODO: time-aware and time-naive datetime objects, see # https://docs.djangoproject.com/en/dev/topics/i18n/timezones/#naive-and-aware-datetime-objects new_entry.dval = value_to_set new_entry.tval = '' new_entry.bval = None new_entry.ival = None new_entry.fval = None elif isinstance(value, (list, tuple)): new_entry.datatype = 'list' new_entry.dval = None new_entry.tval = '' new_entry.bval = None new_entry.ival = len(value) new_entry.fval = None for i, subv in enumerate(value): # I do not need get_or_create here, because # above I deleted all children (and I # expect no concurrency) # NOTE: I do not pass other_attribs list_to_return.extend( cls.create_value(key=('{}{}{:d}'.format(key, cls._sep, i)), value=subv, subspecifier_value=subspecifier_value)) elif isinstance(value, dict): new_entry.datatype = 'dict' new_entry.dval = None new_entry.tval = '' new_entry.bval = None new_entry.ival = len(value) new_entry.fval = None for subk, subv in value.items(): cls.validate_key(subk) # I do not need get_or_create here, because # above I deleted all children (and I # expect no concurrency) # NOTE: I do not pass other_attribs list_to_return.extend( cls.create_value(key='{}{}{}'.format(key, cls._sep, subk), value=subv, subspecifier_value=subspecifier_value)) else: try: jsondata = json.dumps(value) except TypeError: raise ValueError( 'Unable to store the value: it must be either a basic datatype, or json-serializable: {}' .format(value)) new_entry.datatype = 'json' new_entry.tval = jsondata new_entry.bval = None new_entry.ival = None new_entry.fval = None return list_to_return
def _deserialize_attribute(mainitem, subitems, sep, original_class=None, original_pk=None, lesserrors=False): """Deserialize a single attribute. :param mainitem: the main item (either the attribute itself for base types (None, string, ...) or the main item for lists and dicts. Must contain the 'key' key and also the following keys: datatype, tval, fval, ival, bval, dval. NOTE that a type check is not performed! tval is expected to be a string, dval a date, etc. :param subitems: must be a dictionary of dictionaries. In the top-level dictionary, the key must be the key of the attribute, stripped of all prefixes (i.e., if the mainitem has key 'a.b' and we pass subitems 'a.b.0', 'a.b.1', 'a.b.1.c', their keys must be '0', '1', '1.c'). It must be None if the value is not iterable (int, str, float, ...). It is an empty dictionary if there are no subitems. :param sep: a string, the separator between subfields (to separate the name of a dictionary from the keys it contains, for instance) :param original_class: if these elements come from a specific subclass of DbMultipleValueAttributeBaseClass, pass here the class (note: the class, not the instance!). This is used only in case the wrong number of elements is found in the raw data, to print a more meaningful message (if the class has a dbnode associated to it) :param original_pk: if the elements come from a specific subclass of DbMultipleValueAttributeBaseClass that has a dbnode associated to it, pass here the PK integer. This is used only in case the wrong number of elements is found in the raw data, to print a more meaningful message :param lesserrors: If set to True, in some cases where the content of the DB is not consistent but data is still recoverable, it will just log the message rather than raising an exception (e.g. if the number of elements of a dictionary is different from the number declared in the ival field). :return: the deserialized value :raise aiida.backends.djsite.db.migrations.DeserializationException: if an error occurs""" from aiida.common import json from aiida.common.timezone import (is_naive, make_aware, get_current_timezone) if mainitem['datatype'] in ['none', 'bool', 'int', 'float', 'txt']: if subitems: raise DeserializationException("'{}' is of a base type, " 'but has subitems!'.format( mainitem.key)) return _deserialize_basic_type(mainitem) if mainitem['datatype'] == 'date': if subitems: raise DeserializationException("'{}' is of a base type, " 'but has subitems!'.format( mainitem.key)) if is_naive(mainitem['dval']): return make_aware(mainitem['dval'], get_current_timezone()) return mainitem['dval'] if mainitem['datatype'] == 'list': return deserialize_list(mainitem, subitems, sep, original_class, original_pk, lesserrors) if mainitem['datatype'] == 'dict': return deserialize_dict(mainitem, subitems, sep, original_class, original_pk, lesserrors) if mainitem['datatype'] == 'json': try: return json.loads(mainitem['tval']) except ValueError: raise DeserializationException( 'Error in the content of the json field') else: raise DeserializationException( "The type field '{}' is not recognized".format( mainitem['datatype']))
def _deserialize_attribute(mainitem, subitems, sep, original_class=None, original_pk=None, lesserrors=False): """ Deserialize a single attribute. :param mainitem: the main item (either the attribute itself for base types (None, string, ...) or the main item for lists and dicts. Must contain the 'key' key and also the following keys: datatype, tval, fval, ival, bval, dval. NOTE that a type check is not performed! tval is expected to be a string, dval a date, etc. :param subitems: must be a dictionary of dictionaries. In the top-level dictionary, the key must be the key of the attribute, stripped of all prefixes (i.e., if the mainitem has key 'a.b' and we pass subitems 'a.b.0', 'a.b.1', 'a.b.1.c', their keys must be '0', '1', '1.c'). It must be None if the value is not iterable (int, str, float, ...). It is an empty dictionary if there are no subitems. :param sep: a string, the separator between subfields (to separate the name of a dictionary from the keys it contains, for instance) :param original_class: if these elements come from a specific subclass of DbMultipleValueAttributeBaseClass, pass here the class (note: the class, not the instance!). This is used only in case the wrong number of elements is found in the raw data, to print a more meaningful message (if the class has a dbnode associated to it) :param original_pk: if the elements come from a specific subclass of DbMultipleValueAttributeBaseClass that has a dbnode associated to it, pass here the PK integer. This is used only in case the wrong number of elements is found in the raw data, to print a more meaningful message :param lesserrors: If set to True, in some cases where the content of the DB is not consistent but data is still recoverable, it will just log the message rather than raising an exception (e.g. if the number of elements of a dictionary is different from the number declared in the ival field). :return: the deserialized value :raise aiida.backends.djsite.db.migrations.DeserializationException: if an error occurs """ from aiida.common import json from aiida.common.timezone import (is_naive, make_aware, get_current_timezone) from aiida.common import AIIDA_LOGGER if mainitem['datatype'] == 'none': if subitems: raise DeserializationException("'{}' is of a base type, " 'but has subitems!'.format( mainitem.key)) return None elif mainitem['datatype'] == 'bool': if subitems: raise DeserializationException("'{}' is of a base type, " 'but has subitems!'.format( mainitem.key)) return mainitem['bval'] elif mainitem['datatype'] == 'int': if subitems: raise DeserializationException("'{}' is of a base type, " 'but has subitems!'.format( mainitem.key)) return mainitem['ival'] elif mainitem['datatype'] == 'float': if subitems: raise DeserializationException("'{}' is of a base type, " 'but has subitems!'.format( mainitem.key)) return mainitem['fval'] elif mainitem['datatype'] == 'txt': if subitems: raise DeserializationException("'{}' is of a base type, " 'but has subitems!'.format( mainitem.key)) return mainitem['tval'] elif mainitem['datatype'] == 'date': if subitems: raise DeserializationException("'{}' is of a base type, " 'but has subitems!'.format( mainitem.key)) if is_naive(mainitem['dval']): return make_aware(mainitem['dval'], get_current_timezone()) else: return mainitem['dval'] elif mainitem['datatype'] == 'list': # subitems contains all subitems, here I store only those of # deepness 1, i.e. if I have subitems '0', '1' and '1.c' I # store only '0' and '1' firstlevelsubdict = {k: v for k, v in subitems.items() if sep not in k} # For checking, I verify the expected values expected_set = set(['{:d}'.format(i) for i in range(mainitem['ival'])]) received_set = set(firstlevelsubdict.keys()) # If there are more entries than expected, but all expected # ones are there, I just issue an error but I do not stop. if not expected_set.issubset(received_set): if (original_class is not None and original_class._subspecifier_field_name is not None): subspecifier_string = '{}={} and '.format( original_class._subspecifier_field_name, original_pk) else: subspecifier_string = '' if original_class is None: sourcestr = 'the data passed' else: sourcestr = original_class.__name__ raise DeserializationException( 'Wrong list elements stored in {} for ' "{}key='{}' ({} vs {})".format(sourcestr, subspecifier_string, mainitem['key'], expected_set, received_set)) if expected_set != received_set: if (original_class is not None and original_class._subspecifier_field_name is not None): subspecifier_string = '{}={} and '.format( original_class._subspecifier_field_name, original_pk) else: subspecifier_string = '' if original_class is None: sourcestr = 'the data passed' else: sourcestr = original_class.__name__ msg = ('Wrong list elements stored in {} for ' "{}key='{}' ({} vs {})".format(sourcestr, subspecifier_string, mainitem['key'], expected_set, received_set)) if lesserrors: AIIDA_LOGGER.error(msg) else: raise DeserializationException(msg) # I get the values in memory as a dictionary tempdict = {} for firstsubk, firstsubv in firstlevelsubdict.items(): # I call recursively the same function to get subitems newsubitems = { k[len(firstsubk) + len(sep):]: v for k, v in subitems.items() if k.startswith(firstsubk + sep) } tempdict[firstsubk] = _deserialize_attribute( mainitem=firstsubv, subitems=newsubitems, sep=sep, original_class=original_class, original_pk=original_pk) # And then I put them in a list retlist = [tempdict['{:d}'.format(i)] for i in range(mainitem['ival'])] return retlist elif mainitem['datatype'] == 'dict': # subitems contains all subitems, here I store only those of # deepness 1, i.e. if I have subitems '0', '1' and '1.c' I # store only '0' and '1' firstlevelsubdict = {k: v for k, v in subitems.items() if sep not in k} if len(firstlevelsubdict) != mainitem['ival']: if (original_class is not None and original_class._subspecifier_field_name is not None): subspecifier_string = '{}={} and '.format( original_class._subspecifier_field_name, original_pk) else: subspecifier_string = '' if original_class is None: sourcestr = 'the data passed' else: sourcestr = original_class.__name__ msg = ('Wrong dict length stored in {} for ' "{}key='{}' ({} vs {})".format(sourcestr, subspecifier_string, mainitem['key'], len(firstlevelsubdict), mainitem['ival'])) if lesserrors: AIIDA_LOGGER.error(msg) else: raise DeserializationException(msg) # I get the values in memory as a dictionary tempdict = {} for firstsubk, firstsubv in firstlevelsubdict.items(): # I call recursively the same function to get subitems newsubitems = { k[len(firstsubk) + len(sep):]: v for k, v in subitems.items() if k.startswith(firstsubk + sep) } tempdict[firstsubk] = _deserialize_attribute( mainitem=firstsubv, subitems=newsubitems, sep=sep, original_class=original_class, original_pk=original_pk) return tempdict elif mainitem['datatype'] == 'json': try: return json.loads(mainitem['tval']) except ValueError: raise DeserializationException( 'Error in the content of the json field') else: raise DeserializationException( "The type field '{}' is not recognized".format( mainitem['datatype']))
# Keep DEBUG = False! Otherwise every query is stored in memory DEBUG = False ADMINS = [] ALLOWED_HOSTS = [] MANAGERS = ADMINS # Language code for this installation. All choices can be found here: # http://www.i18nguy.com/unicode/language-identifiers.html LANGUAGE_CODE = 'en-us' # Local time zone for this installation. Always choose the system timezone. # Note: This causes django to set the 'TZ' environment variable, which is read by tzlocal from then onwards. # See https://docs.djangoproject.com/en/2.2/ref/settings/#std:setting-TIME_ZONE TIME_ZONE = get_current_timezone().zone SITE_ID = 1 # If you set this to False, Django will make some optimizations so as not # to load the internationalization machinery. USE_I18N = False # If you set this to False, Django will not format dates, numbers and # calendars according to the current locale. USE_L10N = False # If you set this to False, Django will not use timezone-aware datetimes. # For AiiDA, leave it as True, otherwise setting properties with dates will not work. USE_TZ = settings.USE_TZ
def _read_backup_info_from_dict(self, backup_variables): # pylint: disable=too-many-branches,too-many-statements """ This method reads the backup information from the given dictionary and sets the needed class variables. """ # Setting the oldest backup date. This will be used as start of # the new backup procedure. # # If the oldest backup date is not set, then find the oldest # creation timestamp and set it as the oldest backup date. if backup_variables.get(self.OLDEST_OBJECT_BK_KEY) is None: query_node_res = self._query_first_node() if not query_node_res: self._logger.error('The oldest modification date was not found.') raise BackupError('The oldest modification date was not found.') oldest_timestamps = [] if query_node_res: oldest_timestamps.append(query_node_res[0].ctime) self._oldest_object_bk = min(oldest_timestamps) self._logger.info( 'Setting the oldest modification date to the creation date of the oldest object ' '(%s)', self._oldest_object_bk ) # If the oldest backup date is not None then try to parse it else: try: self._oldest_object_bk = parse(backup_variables.get(self.OLDEST_OBJECT_BK_KEY)) if self._oldest_object_bk.tzinfo is None: curr_timezone = dtimezone.get_current_timezone() self._oldest_object_bk = dtimezone.get_current_timezone().localize(self._oldest_object_bk) self._logger.info( 'No timezone defined in the oldest modification date timestamp. Setting current timezone (%s).', curr_timezone.zone ) # If it is not parsable... except ValueError: self._logger.error('We did not manage to parse the start timestamp of the last backup.') raise # Setting the backup directory & normalizing it self._backup_dir = os.path.normpath(backup_variables.get(self.BACKUP_DIR_KEY)) if (not self._ignore_backup_dir_existence_check and not os.path.isdir(self._backup_dir)): self._logger.error('The given backup directory does not exist.') raise BackupError('The given backup directory does not exist.') # You can not set an end-of-backup date and end days from the backup # that you should stop. if ( backup_variables.get(self.DAYS_TO_BACKUP_KEY) is not None and backup_variables.get(self.END_DATE_OF_BACKUP_KEY) is not None ): self._logger.error('Only one end of backup date can be set.') raise BackupError('Only one backup end can be set (date or days from backup start.') # Check if there is an end-of-backup date elif backup_variables.get(self.END_DATE_OF_BACKUP_KEY) is not None: try: self._end_date_of_backup = parse(backup_variables.get(self.END_DATE_OF_BACKUP_KEY)) if self._end_date_of_backup.tzinfo is None: curr_timezone = dtimezone.get_current_timezone() self._end_date_of_backup = \ curr_timezone.localize( self._end_date_of_backup) self._logger.info( 'No timezone defined in the end date of backup timestamp. Setting current timezone (%s).', curr_timezone.zone ) self._internal_end_date_of_backup = self._end_date_of_backup except ValueError: self._logger.error('The end date of the backup could not be parsed correctly') raise # Check if there is defined a days to backup elif backup_variables.get(self.DAYS_TO_BACKUP_KEY) is not None: try: self._days_to_backup = int(backup_variables.get(self.DAYS_TO_BACKUP_KEY)) self._internal_end_date_of_backup = ( self._oldest_object_bk + datetime.timedelta(days=self._days_to_backup) ) except ValueError: self._logger.error('The days to backup should be an integer') raise # If the backup end is not set, then the ending date remains open # Parse the backup periodicity. try: self._periodicity = int(backup_variables.get(self.PERIODICITY_KEY)) except ValueError: self._logger.error('The backup _periodicity should be an integer') raise # Parse the backup length threshold try: hours_th = int(backup_variables.get(self.BACKUP_LENGTH_THRESHOLD_KEY)) self._backup_length_threshold = datetime.timedelta(hours=hours_th) except ValueError: self._logger.error('The backup length threshold should be an integer') raise