def desc_files_backup(job_data): ''' Function, creates a desc backup of directories. At the entrance receives a dictionary with the data of the job. ''' try: job_name = job_data['job'] backup_type = job_data['type'] tmp_dir = job_data['tmp_dir'] sources = job_data['sources'] storages = job_data['storages'] except KeyError as e: log_and_mail.writelog('ERROR', f"Missing required key:'{e}'!", config.filelog_fd, job_name) return 1 full_path_tmp_dir = general_function.get_tmp_dir(tmp_dir, backup_type) for i in range(len(sources)): exclude_list = sources[i].get('excludes', '') try: target_list = sources[i]['target'] gzip = sources[i]['gzip'] except KeyError as e: log_and_mail.writelog('ERROR', f"Missing required key:'{e}'!", config.filelog_fd, job_name) continue # Keeping an exception list in the global variable due to the specificity of # the `filter` key of the `add` method of the `tarfile` class general_files_func.EXCLUDE_FILES = general_files_func.get_exclude_ofs(target_list, exclude_list) # The backup name is selected depending on the particular glob patterns from # the list `target_list` for regex in target_list: target_ofs_list = general_files_func.get_ofs(regex) if not target_ofs_list: log_and_mail.writelog('ERROR', "No file system objects found that" +\ f"match the regular expression '{regex}'!", config.filelog_fd, job_name) continue for i in target_ofs_list: # Create a backup only if the directory is not in the exception list # so as not to generate empty backups if not general_files_func.is_excluded_ofs(i): # A function that by regularity returns the name of # the backup WITHOUT EXTENSION AND DATE backup_file_name = general_files_func.get_name_files_backup(regex, i) # Get the part of the backup storage path for this archive relative to # the backup dir part_of_dir_path = backup_file_name.replace('___', '/') backup_full_tmp_path = general_function.get_full_path( full_path_tmp_dir, backup_file_name, 'tar', gzip) periodic_backup.remove_old_local_file(storages, part_of_dir_path, job_name) if general_files_func.create_tar('files', backup_full_tmp_path, i, gzip, backup_type, job_name): # If the dump collection in the temporary directory has successfully # transferred the data to the specified storage periodic_backup.general_desc_iteration(backup_full_tmp_path, storages, part_of_dir_path, job_name) else: continue # After all the manipulations, delete the created temporary directory and # data inside the directory with cache davfs, but not the directory itself! general_function.del_file_objects(backup_type, full_path_tmp_dir, '/var/cache/davfs2/*')
def desc_files_backup(job_data): """ Function, creates a desc backup of directories. At the entrance receives a dictionary with the data of the job. """ is_prams_read, job_name, backup_type, tmp_dir, sources, storages, safety_backup, deferred_copying_level = \ general_function.get_job_parameters(job_data) if not is_prams_read: return full_path_tmp_dir = general_function.get_tmp_dir(tmp_dir, backup_type) dumped_ofs = {} for i in range(len(sources)): exclude_list = sources[i].get('excludes', '') try: target_list = sources[i]['target'] gzip = sources[i]['gzip'] except KeyError as e: log_and_mail.writelog('ERROR', f"Missing required key:'{e}'!", config.filelog_fd, job_name) continue # Keeping an exception list in the global variable due to the specificity of # the `filter` key of the `add` method of the `tarfile` class general_files_func.EXCLUDE_FILES = general_files_func.get_exclude_ofs( target_list, exclude_list) # The backup name is selected depending on the particular glob patterns from # the list `target_list` for regex in target_list: target_ofs_list = general_files_func.get_ofs(regex) if not target_ofs_list: log_and_mail.writelog( 'ERROR', "No file system objects found that" + f"match the regular expression '{regex}'!", config.filelog_fd, job_name) continue for ofs in target_ofs_list: # Create a backup only if the directory is not in the exception list # so as not to generate empty backups if not general_files_func.is_excluded_ofs(ofs): # A function that by regularity returns the name of # the backup WITHOUT EXTENSION AND DATE backup_file_name = general_files_func.get_name_files_backup( regex, ofs) # Get the part of the backup storage path for this archive relative to # the backup dir part_of_dir_path = backup_file_name.replace('___', '/') backup_full_tmp_path = general_function.get_full_path( full_path_tmp_dir, backup_file_name, 'tar', gzip) periodic_backup.remove_old_local_file( storages, part_of_dir_path, job_name) if general_files_func.create_tar('files', backup_full_tmp_path, ofs, gzip, backup_type, job_name): dumped_ofs[ofs] = { 'success': True, 'tmp_path': backup_full_tmp_path, 'part_of_dir_path': part_of_dir_path } else: dumped_ofs[ofs] = {'success': False} if deferred_copying_level <= 0 and dumped_ofs[ofs][ 'success']: periodic_backup.general_desc_iteration( backup_full_tmp_path, storages, part_of_dir_path, job_name, safety_backup) else: continue for ofs, result in dumped_ofs.items(): if deferred_copying_level == 1 and result['success']: periodic_backup.general_desc_iteration( result['tmp_path'], storages, result['part_of_dir_path'], job_name, safety_backup) for ofs, result in dumped_ofs.items(): if deferred_copying_level == 2 and result['success']: periodic_backup.general_desc_iteration( result['tmp_path'], storages, result['part_of_dir_path'], job_name, safety_backup) for ofs, result in dumped_ofs.items(): if deferred_copying_level >= 3 and result['success']: periodic_backup.general_desc_iteration(result['tmp_path'], storages, result['part_of_dir_path'], job_name, safety_backup) # After all the manipulations, delete the created temporary directory and # data inside the directory with cache davfs, but not the directory itself! general_function.del_file_objects(backup_type, full_path_tmp_dir, '/var/cache/davfs2/*')
def create_inc_file(local_dst_dirname, remote_dir, part_of_dir_path, backup_file_name, target, exclude_list, gzip, job_name, storage, host, share): ''' The function determines whether to collect a full backup or incremental, prepares all the necessary information. ''' date_year = general_function.get_time_now('year') date_month = general_function.get_time_now('moy') date_day = general_function.get_time_now('dom') if int(date_day) < 11: daily_prefix = 'day_01' elif int(date_day) < 21: daily_prefix = 'day_11' else: daily_prefix = 'day_21' year_dir = os.path.join(local_dst_dirname, part_of_dir_path, date_year) initial_dir = os.path.join(year_dir, 'year') # Path to full backup month_dir = os.path.join(year_dir, 'month_%s' % (date_month), 'monthly') daily_dir = os.path.join(year_dir, 'month_%s' % (date_month), 'daily', daily_prefix) year_inc_file = os.path.join(initial_dir, 'year.inc') month_inc_file = os.path.join(month_dir, 'month.inc') daily_inc_file = os.path.join(daily_dir, 'daily.inc') link_dict = {} # dict for symlink with pairs like dst: src copy_dict = {} # dict for copy with pairs like dst: src # Before we proceed to collect a copy, we need to delete the copies for the same month last year # if they are to not save extra archives old_year = int(date_year) - 1 old_year_dir = os.path.join(local_dst_dirname, part_of_dir_path, str(old_year)) if os.path.isdir(old_year_dir): old_month_dir = os.path.join(old_year_dir, 'month_%s' % (date_month)) del_old_inc_file(old_year_dir, old_month_dir) if not os.path.isfile(year_inc_file): # There is no original index file, so we need to check the existence of an year directory if os.path.isdir(year_dir): # There is a directory, but there is no file itself, then something went wrong, so # we delete this directory with all the data inside, because even if they are there # continue to collect incremental copies it will not be able to general_function.del_file_objects(job_name, year_dir) dirs_for_log = general_function.get_dirs_for_log( year_dir, remote_dir, storage) file_for_log = os.path.join(dirs_for_log, os.path.basename(year_inc_file)) log_and_mail.writelog('ERROR', "The file %s not found, so the directory %s is cleared." +\ "Incremental backup will be reinitialized " %(file_for_log, dirs_for_log), config.filelog_fd, job_name) # Initialize the incremental backup, i.e. collect a full copy dirs_for_log = general_function.get_dirs_for_log( initial_dir, remote_dir, storage) general_function.create_dirs(job_name=job_name, dirs_pairs={initial_dir: dirs_for_log}) # Get the current list of files and write to the year inc file meta_info = get_index(target, exclude_list) with open(year_inc_file, "w") as index_file: json.dump(meta_info, index_file) full_backup_path = general_function.get_full_path( initial_dir, backup_file_name, 'tar', gzip) general_files_func.create_tar('files', full_backup_path, target, gzip, 'inc_files', job_name, remote_dir, storage, host, share) # After creating the full copy, you need to make the symlinks for the inc.file and # the most collected copy in the month directory of the current month # as well as in the decade directory if it's local, scp the repository and # copy inc.file for other types of repositories that do not support symlynk. month_dirs_for_log = general_function.get_dirs_for_log( month_dir, remote_dir, storage) daily_dirs_for_log = general_function.get_dirs_for_log( daily_dir, remote_dir, storage) general_function.create_dirs(job_name=job_name, dirs_pairs={ month_dir: month_dirs_for_log, daily_dir: daily_dirs_for_log }) if storage in 'local, scp': link_dict[month_inc_file] = year_inc_file link_dict[os.path.join( month_dir, os.path.basename(full_backup_path))] = full_backup_path link_dict[daily_inc_file] = year_inc_file link_dict[os.path.join( daily_dir, os.path.basename(full_backup_path))] = full_backup_path else: copy_dict[month_inc_file] = year_inc_file copy_dict[daily_inc_file] = year_inc_file else: symlink_dir = '' if int(date_day) == 1: # It is necessary to collect monthly incremental backup relative to the year copy old_meta_info = specific_function.parser_json(year_inc_file) new_meta_info = get_index(target, exclude_list) general_inc_backup_dir = month_dir # It is also necessary to make a symlink for inc files and backups to the directory with the first decade symlink_dir = daily_dir general_dirs_for_log = general_function.get_dirs_for_log( general_inc_backup_dir, remote_dir, storage) symlink_dirs_for_log = general_function.get_dirs_for_log( symlink_dir, remote_dir, storage) general_function.create_dirs(job_name=job_name, dirs_pairs={ general_inc_backup_dir: general_dirs_for_log, symlink_dir: symlink_dirs_for_log }) with open(month_inc_file, "w") as index_file: json.dump(new_meta_info, index_file) elif int(date_day) == 11 or int(date_day) == 21: # It is necessary to collect a ten-day incremental backup relative to a monthly copy try: old_meta_info = specific_function.parser_json(month_inc_file) except general_function.MyError as e: log_and_mail.writelog( 'ERROR', "Couldn't open old month meta info file '%s': %s!" % (month_inc_file, e), config.filelog_fd, job_name) return 2 new_meta_info = get_index(target, exclude_list) general_inc_backup_dir = daily_dir general_dirs_for_log = general_function.get_dirs_for_log( general_inc_backup_dir, remote_dir, storage) general_function.create_dirs( job_name=job_name, dirs_pairs={general_inc_backup_dir: general_dirs_for_log}) with open(daily_inc_file, "w") as index_file: json.dump(new_meta_info, index_file) else: # It is necessary to collect a normal daily incremental backup relative to a ten-day copy try: old_meta_info = specific_function.parser_json(daily_inc_file) except general_function.MyError as e: log_and_mail.writelog( 'ERROR', "Couldn't open old decade meta info file '%s': %s!" % (daily_inc_file, e), config.filelog_fd, job_name) return 2 new_meta_info = get_index(target, exclude_list) general_inc_backup_dir = daily_dir general_dirs_for_log = general_function.get_dirs_for_log( general_inc_backup_dir, remote_dir, storage) general_function.create_dirs( job_name=job_name, dirs_pairs={general_inc_backup_dir: general_dirs_for_log}) # Calculate the difference between the old and new file states diff_json = compute_diff(new_meta_info, old_meta_info) inc_backup_path = general_function.get_full_path( general_inc_backup_dir, backup_file_name, 'tar', gzip) # Define the list of files that need to be included in the archive target_change_list = diff_json['modify'] # Form GNU.dumpdir headers dict_directory = {} # Dict to store pairs like dir:GNU.dumpdir excludes = r'|'.join([ fnmatch.translate(x)[:-7] for x in general_files_func.EXCLUDE_FILES ]) or r'$.' for dir_name, dirs, files in os.walk(target): first_level_files = [] if re.match(excludes, dir_name): continue for file in files: if re.match(excludes, os.path.join(dir_name, file)): continue first_level_files.append(file) first_level_subdirs = dirs dict_directory[dir_name] = get_gnu_dumpdir_format( diff_json, dir_name, target, excludes, first_level_subdirs, first_level_files) create_inc_tar(inc_backup_path, remote_dir, dict_directory, target_change_list, gzip, job_name, storage, host, share) if symlink_dir: if storage in 'local, scp': link_dict[daily_inc_file] = month_inc_file else: copy_dict[daily_inc_file] = month_inc_file if link_dict: for key in link_dict.keys(): src = link_dict[key] dst = key try: general_function.create_symlink(src, dst) except general_function.MyError as err: log_and_mail.writelog( 'ERROR', "Can't create symlink %s -> %s: %s" % (src, dst, err), config.filelog_fd, job_name) if copy_dict: for key in copy_dict.keys(): src = copy_dict[key] dst = key try: general_function.copy_ofs(src, dst) except general_function.MyError as err: log_and_mail.writelog( 'ERROR', "Can't copy %s -> %s: %s" % (src, dst, err), config.filelog_fd, job_name)
def create_inc_backup(local_dst_dirname, remote_dir, part_of_dir_path, backup_file_name, target, exclude_list, gzip, job_name, storage, host, share, months_to_store): """ The function determines whether to collect a full backup or incremental, prepares all the necessary information. """ date_year = general_function.get_time_now('year') date_month = general_function.get_time_now('moy') date_day = general_function.get_time_now('dom') dated_paths = get_dated_paths(local_dst_dirname, part_of_dir_path, date_year, date_month, date_day) # Before we proceed to collect a copy, we need to delete the copies for the same month last year # if they are to not save extra archives old_month_dirs = [] if os.path.isdir(dated_paths['old_year_dir']) or months_to_store < 12: if months_to_store < 12: int_date_month = int(date_month) last_month = int_date_month - months_to_store if last_month <= 0: m_range = list(range(last_month+12, 13)) m_range.extend(list(range(1, int_date_month))) else: m_range = list(range(last_month, int_date_month)) for i in range(1, 13): if i not in m_range: date = str(i).zfill(2) if i < int(date_month): year_to_cleanup = dated_paths['year_dir'] else: year_to_cleanup = dated_paths['old_year_dir'] old_month_dirs.append(os.path.join(year_to_cleanup, f'month_{date}')) else: old_month_dirs.append(os.path.join(dated_paths['old_year_dir'], f'month_{date_month}')) del_old_inc_file(dated_paths['old_year_dir'], old_month_dirs) link_dict = {} # dict for symlink with pairs like dst: src copy_dict = {} # dict for copy with pairs like dst: src # Get the current list of files new_meta_info = get_index(target, exclude_list) if not os.path.isfile(dated_paths['year_inc_file']): # There is no original index file, so we need to check the existence of an year directory if os.path.isdir(dated_paths['year_dir']): # There is a directory, but there is no file itself, then something went wrong, so # we delete this directory with all the data inside, because even if they are there # continue to collect incremental copies it will not be able to general_function.del_file_objects(job_name, dated_paths['year_dir']) dirs_for_log = general_function.get_dirs_for_log(dated_paths['year_dir'], remote_dir, storage) file_for_log = os.path.join(dirs_for_log, os.path.basename(dated_paths['year_inc_file'])) log_and_mail.writelog('ERROR', f"The file {file_for_log} not found, so the directory {dirs_for_log} is cleared. " f"Incremental backup will be reinitialized ", config.filelog_fd, job_name) # Initialize the incremental backup, i.e. collect a full copy remote_dir_for_logs = general_function.get_dirs_for_log(dated_paths['initial_dir'], remote_dir, storage) general_function.create_dirs(job_name=job_name, dirs_pairs={dated_paths['initial_dir']: remote_dir_for_logs}) write_meta_info(dated_paths['year_inc_file'], new_meta_info) full_backup_path = general_function.get_full_path(dated_paths['initial_dir'], backup_file_name, 'tar', gzip) general_files_func.create_tar('files', full_backup_path, target, gzip, 'inc_files', job_name, remote_dir, storage, host, share) daily_dirs_remote = general_function.get_dirs_for_log(dated_paths['daily_dir'], remote_dir, storage) month_dirs_remote = general_function.get_dirs_for_log(dated_paths['month_dir'], remote_dir, storage) general_function.create_dirs(job_name=job_name, dirs_pairs={dated_paths['daily_dir']: daily_dirs_remote, dated_paths['month_dir']: month_dirs_remote}) if storage in 'local': link_dict[dated_paths['month_inc_file']] = dated_paths['year_inc_file'] link_dict[os.path.join(dated_paths['month_dir'], os.path.basename(full_backup_path))] = full_backup_path link_dict[dated_paths['daily_inc_file']] = dated_paths['year_inc_file'] link_dict[os.path.join(dated_paths['daily_dir'], os.path.basename(full_backup_path))] = full_backup_path elif storage in 'scp, nfs': copy_dict[dated_paths['month_inc_file']] = dated_paths['year_inc_file'] link_dict[os.path.join(dated_paths['month_dir'], os.path.basename(full_backup_path))] = \ full_backup_path.replace(local_dst_dirname, remote_dir) copy_dict[dated_paths['daily_inc_file']] = dated_paths['year_inc_file'] link_dict[os.path.join(dated_paths['daily_dir'], os.path.basename(full_backup_path))] = \ full_backup_path.replace(local_dst_dirname, remote_dir) else: copy_dict[dated_paths['month_inc_file']] = dated_paths['year_inc_file'] copy_dict[os.path.join(dated_paths['month_dir'], os.path.basename(full_backup_path))] = full_backup_path copy_dict[dated_paths['daily_inc_file']] = dated_paths['year_inc_file'] copy_dict[os.path.join(dated_paths['daily_dir'], os.path.basename(full_backup_path))] = full_backup_path else: symlink_dir = '' meta_path = '' if int(date_day) == 1: meta_path = dated_paths['month_inc_file'] old_meta_path = dated_paths['year_inc_file'] general_inc_backup_dir = dated_paths['month_dir'] symlink_dir = dated_paths['daily_dir'] elif int(date_day) == 11 or int(date_day) == 21: meta_path = dated_paths['daily_inc_file'] old_meta_path = dated_paths['month_inc_file'] general_inc_backup_dir = dated_paths['daily_dir'] else: old_meta_path = dated_paths['daily_inc_file'] general_inc_backup_dir = dated_paths['daily_dir'] try: old_meta_info = specific_function.parser_json(old_meta_path) except general_function.MyError as e: log_and_mail.writelog('ERROR', f"Couldn't open old meta info file '{old_meta_path}': {e}!", config.filelog_fd, job_name) return 2 general_dirs_for_log = general_function.get_dirs_for_log(general_inc_backup_dir, remote_dir, storage) general_function.create_dirs(job_name=job_name, dirs_pairs={general_inc_backup_dir: general_dirs_for_log}) if meta_path: write_meta_info(meta_path, new_meta_info) # Calculate the difference between the old and new file states diff_json = compute_diff(new_meta_info, old_meta_info) # Define the list of files that need to be included in the archive target_change_list = diff_json['modify'] dict_directory = get_dict_directory(target, diff_json) inc_backup_path = general_function.get_full_path(general_inc_backup_dir, backup_file_name, 'tar', gzip) create_inc_tar( inc_backup_path, remote_dir, dict_directory, target_change_list, gzip, job_name, storage, host, share ) if symlink_dir: symlink_dirs_for_log = general_function.get_dirs_for_log(symlink_dir, remote_dir, storage) general_function.create_dirs(job_name=job_name, dirs_pairs={symlink_dir: symlink_dirs_for_log}) if storage in 'local': link_dict[dated_paths['daily_inc_file']] = dated_paths['month_inc_file'] elif storage in 'scp, nfs': copy_dict[dated_paths['daily_inc_file'].replace(local_dst_dirname, remote_dir)] = \ dated_paths['month_inc_file'].replace(local_dst_dirname, remote_dir) else: copy_dict[dated_paths['daily_inc_file']] = dated_paths['month_inc_file'] create_links_and_copies(link_dict, copy_dict, job_name)