Exemple #1
0
def desc_files_backup(job_data):
    ''' Function, creates a desc backup of directories.
    At the entrance receives a dictionary with the data of the job.

    '''

    try:
        job_name = job_data['job']
        backup_type = job_data['type']
        tmp_dir = job_data['tmp_dir']
        sources = job_data['sources']
        storages = job_data['storages']
    except KeyError as e:
        log_and_mail.writelog('ERROR', f"Missing required key:'{e}'!",
                              config.filelog_fd, job_name)
        return 1


    full_path_tmp_dir = general_function.get_tmp_dir(tmp_dir, backup_type)

    for i in range(len(sources)):
        exclude_list = sources[i].get('excludes', '')
        try:
            target_list = sources[i]['target']
            gzip =  sources[i]['gzip']
        except KeyError as e:
            log_and_mail.writelog('ERROR', f"Missing required key:'{e}'!",
                                  config.filelog_fd, job_name)
            continue

        # Keeping an exception list in the global variable due to the specificity of
        # the `filter` key of the `add` method of the `tarfile` class
        general_files_func.EXCLUDE_FILES = general_files_func.get_exclude_ofs(target_list,
                                                                              exclude_list)

        # The backup name is selected depending on the particular glob patterns from
        # the list `target_list`
        for regex in target_list:
            target_ofs_list = general_files_func.get_ofs(regex)

            if not target_ofs_list:
                log_and_mail.writelog('ERROR', "No file system objects found that" +\
                                      f"match the regular expression '{regex}'!",
                                      config.filelog_fd, job_name)
                continue

            for i in target_ofs_list:
                # Create a backup only if the directory is not in the exception list
                # so as not to generate empty backups
                if not general_files_func.is_excluded_ofs(i):
                    # A function that by regularity returns the name of 
                    # the backup WITHOUT EXTENSION AND DATE
                    backup_file_name = general_files_func.get_name_files_backup(regex, i)
                    # Get the part of the backup storage path for this archive relative to
                    # the backup dir
                    part_of_dir_path = backup_file_name.replace('___', '/')

                    backup_full_tmp_path = general_function.get_full_path(
                                                                    full_path_tmp_dir,
                                                                    backup_file_name, 
                                                                    'tar',
                                                                    gzip)

                    periodic_backup.remove_old_local_file(storages, part_of_dir_path, job_name)

                    if general_files_func.create_tar('files', backup_full_tmp_path, i,
                                                  gzip, backup_type, job_name):
                        # If the dump collection in the temporary directory has successfully
                        # transferred the data to the specified storage
                        periodic_backup.general_desc_iteration(backup_full_tmp_path,
                                                               storages, part_of_dir_path,
                                                               job_name)
                else:
                    continue

    # After all the manipulations, delete the created temporary directory and
    # data inside the directory with cache davfs, but not the directory itself!
    general_function.del_file_objects(backup_type,
                                      full_path_tmp_dir, '/var/cache/davfs2/*')
Exemple #2
0
def desc_files_backup(job_data):
    """ Function, creates a desc backup of directories.
    At the entrance receives a dictionary with the data of the job.

    """
    is_prams_read, job_name, backup_type, tmp_dir, sources, storages, safety_backup, deferred_copying_level = \
        general_function.get_job_parameters(job_data)
    if not is_prams_read:
        return

    full_path_tmp_dir = general_function.get_tmp_dir(tmp_dir, backup_type)

    dumped_ofs = {}
    for i in range(len(sources)):
        exclude_list = sources[i].get('excludes', '')
        try:
            target_list = sources[i]['target']
            gzip = sources[i]['gzip']
        except KeyError as e:
            log_and_mail.writelog('ERROR', f"Missing required key:'{e}'!",
                                  config.filelog_fd, job_name)
            continue

        # Keeping an exception list in the global variable due to the specificity of
        # the `filter` key of the `add` method of the `tarfile` class
        general_files_func.EXCLUDE_FILES = general_files_func.get_exclude_ofs(
            target_list, exclude_list)

        # The backup name is selected depending on the particular glob patterns from
        # the list `target_list`
        for regex in target_list:
            target_ofs_list = general_files_func.get_ofs(regex)

            if not target_ofs_list:
                log_and_mail.writelog(
                    'ERROR', "No file system objects found that" +
                    f"match the regular expression '{regex}'!",
                    config.filelog_fd, job_name)
                continue

            for ofs in target_ofs_list:
                # Create a backup only if the directory is not in the exception list
                # so as not to generate empty backups
                if not general_files_func.is_excluded_ofs(ofs):
                    # A function that by regularity returns the name of
                    # the backup WITHOUT EXTENSION AND DATE
                    backup_file_name = general_files_func.get_name_files_backup(
                        regex, ofs)
                    # Get the part of the backup storage path for this archive relative to
                    # the backup dir
                    part_of_dir_path = backup_file_name.replace('___', '/')

                    backup_full_tmp_path = general_function.get_full_path(
                        full_path_tmp_dir, backup_file_name, 'tar', gzip)

                    periodic_backup.remove_old_local_file(
                        storages, part_of_dir_path, job_name)

                    if general_files_func.create_tar('files',
                                                     backup_full_tmp_path, ofs,
                                                     gzip, backup_type,
                                                     job_name):
                        dumped_ofs[ofs] = {
                            'success': True,
                            'tmp_path': backup_full_tmp_path,
                            'part_of_dir_path': part_of_dir_path
                        }
                    else:
                        dumped_ofs[ofs] = {'success': False}

                    if deferred_copying_level <= 0 and dumped_ofs[ofs][
                            'success']:
                        periodic_backup.general_desc_iteration(
                            backup_full_tmp_path, storages, part_of_dir_path,
                            job_name, safety_backup)
                else:
                    continue

            for ofs, result in dumped_ofs.items():
                if deferred_copying_level == 1 and result['success']:
                    periodic_backup.general_desc_iteration(
                        result['tmp_path'], storages,
                        result['part_of_dir_path'], job_name, safety_backup)

        for ofs, result in dumped_ofs.items():
            if deferred_copying_level == 2 and result['success']:
                periodic_backup.general_desc_iteration(
                    result['tmp_path'], storages, result['part_of_dir_path'],
                    job_name, safety_backup)

    for ofs, result in dumped_ofs.items():
        if deferred_copying_level >= 3 and result['success']:
            periodic_backup.general_desc_iteration(result['tmp_path'],
                                                   storages,
                                                   result['part_of_dir_path'],
                                                   job_name, safety_backup)

    # After all the manipulations, delete the created temporary directory and
    # data inside the directory with cache davfs, but not the directory itself!
    general_function.del_file_objects(backup_type, full_path_tmp_dir,
                                      '/var/cache/davfs2/*')
Exemple #3
0
def create_inc_file(local_dst_dirname, remote_dir, part_of_dir_path,
                    backup_file_name, target, exclude_list, gzip, job_name,
                    storage, host, share):
    ''' The function determines whether to collect a full backup or incremental,
    prepares all the necessary information.

    '''

    date_year = general_function.get_time_now('year')
    date_month = general_function.get_time_now('moy')
    date_day = general_function.get_time_now('dom')

    if int(date_day) < 11:
        daily_prefix = 'day_01'
    elif int(date_day) < 21:
        daily_prefix = 'day_11'
    else:
        daily_prefix = 'day_21'

    year_dir = os.path.join(local_dst_dirname, part_of_dir_path, date_year)
    initial_dir = os.path.join(year_dir, 'year')  # Path to full backup
    month_dir = os.path.join(year_dir, 'month_%s' % (date_month), 'monthly')
    daily_dir = os.path.join(year_dir, 'month_%s' % (date_month), 'daily',
                             daily_prefix)

    year_inc_file = os.path.join(initial_dir, 'year.inc')
    month_inc_file = os.path.join(month_dir, 'month.inc')
    daily_inc_file = os.path.join(daily_dir, 'daily.inc')

    link_dict = {}  # dict for symlink with pairs like dst: src
    copy_dict = {}  # dict for copy with pairs like dst: src

    # Before we proceed to collect a copy, we need to delete the copies for the same month last year
    # if they are to not save extra archives

    old_year = int(date_year) - 1
    old_year_dir = os.path.join(local_dst_dirname, part_of_dir_path,
                                str(old_year))
    if os.path.isdir(old_year_dir):
        old_month_dir = os.path.join(old_year_dir, 'month_%s' % (date_month))
        del_old_inc_file(old_year_dir, old_month_dir)

    if not os.path.isfile(year_inc_file):
        # There is no original index file, so we need to check the existence of an year directory
        if os.path.isdir(year_dir):
            # There is a directory, but there is no file itself, then something went wrong, so
            # we delete this directory with all the data inside, because even if they are there
            # continue to collect incremental copies it will not be able to
            general_function.del_file_objects(job_name, year_dir)
            dirs_for_log = general_function.get_dirs_for_log(
                year_dir, remote_dir, storage)
            file_for_log = os.path.join(dirs_for_log,
                                        os.path.basename(year_inc_file))
            log_and_mail.writelog('ERROR', "The file %s not found, so the directory %s is cleared." +\
                                  "Incremental backup will be reinitialized " %(file_for_log, dirs_for_log),
                                  config.filelog_fd, job_name)

        # Initialize the incremental backup, i.e. collect a full copy
        dirs_for_log = general_function.get_dirs_for_log(
            initial_dir, remote_dir, storage)
        general_function.create_dirs(job_name=job_name,
                                     dirs_pairs={initial_dir: dirs_for_log})

        # Get the current list of files and write to the year inc file
        meta_info = get_index(target, exclude_list)
        with open(year_inc_file, "w") as index_file:
            json.dump(meta_info, index_file)

        full_backup_path = general_function.get_full_path(
            initial_dir, backup_file_name, 'tar', gzip)

        general_files_func.create_tar('files', full_backup_path, target, gzip,
                                      'inc_files', job_name, remote_dir,
                                      storage, host, share)

        # After creating the full copy, you need to make the symlinks for the inc.file and
        # the most collected copy in the month directory of the current month
        # as well as in the decade directory if it's local, scp the repository and
        # copy inc.file for other types of repositories that do not support symlynk.

        month_dirs_for_log = general_function.get_dirs_for_log(
            month_dir, remote_dir, storage)
        daily_dirs_for_log = general_function.get_dirs_for_log(
            daily_dir, remote_dir, storage)
        general_function.create_dirs(job_name=job_name,
                                     dirs_pairs={
                                         month_dir: month_dirs_for_log,
                                         daily_dir: daily_dirs_for_log
                                     })

        if storage in 'local, scp':
            link_dict[month_inc_file] = year_inc_file
            link_dict[os.path.join(
                month_dir,
                os.path.basename(full_backup_path))] = full_backup_path
            link_dict[daily_inc_file] = year_inc_file
            link_dict[os.path.join(
                daily_dir,
                os.path.basename(full_backup_path))] = full_backup_path
        else:
            copy_dict[month_inc_file] = year_inc_file
            copy_dict[daily_inc_file] = year_inc_file
    else:
        symlink_dir = ''
        if int(date_day) == 1:
            # It is necessary to collect monthly incremental backup relative to the year copy
            old_meta_info = specific_function.parser_json(year_inc_file)
            new_meta_info = get_index(target, exclude_list)

            general_inc_backup_dir = month_dir

            # It is also necessary to make a symlink for inc files and backups to the directory with the first decade
            symlink_dir = daily_dir

            general_dirs_for_log = general_function.get_dirs_for_log(
                general_inc_backup_dir, remote_dir, storage)
            symlink_dirs_for_log = general_function.get_dirs_for_log(
                symlink_dir, remote_dir, storage)
            general_function.create_dirs(job_name=job_name,
                                         dirs_pairs={
                                             general_inc_backup_dir:
                                             general_dirs_for_log,
                                             symlink_dir: symlink_dirs_for_log
                                         })

            with open(month_inc_file, "w") as index_file:
                json.dump(new_meta_info, index_file)

        elif int(date_day) == 11 or int(date_day) == 21:
            # It is necessary to collect a ten-day incremental backup relative to a monthly copy
            try:
                old_meta_info = specific_function.parser_json(month_inc_file)
            except general_function.MyError as e:
                log_and_mail.writelog(
                    'ERROR',
                    "Couldn't open old month meta info file '%s': %s!" %
                    (month_inc_file, e), config.filelog_fd, job_name)
                return 2

            new_meta_info = get_index(target, exclude_list)

            general_inc_backup_dir = daily_dir
            general_dirs_for_log = general_function.get_dirs_for_log(
                general_inc_backup_dir, remote_dir, storage)
            general_function.create_dirs(
                job_name=job_name,
                dirs_pairs={general_inc_backup_dir: general_dirs_for_log})

            with open(daily_inc_file, "w") as index_file:
                json.dump(new_meta_info, index_file)
        else:
            # It is necessary to collect a normal daily incremental backup relative to a ten-day copy
            try:
                old_meta_info = specific_function.parser_json(daily_inc_file)
            except general_function.MyError as e:
                log_and_mail.writelog(
                    'ERROR',
                    "Couldn't open old decade meta info file '%s': %s!" %
                    (daily_inc_file, e), config.filelog_fd, job_name)
                return 2

            new_meta_info = get_index(target, exclude_list)

            general_inc_backup_dir = daily_dir
            general_dirs_for_log = general_function.get_dirs_for_log(
                general_inc_backup_dir, remote_dir, storage)
            general_function.create_dirs(
                job_name=job_name,
                dirs_pairs={general_inc_backup_dir: general_dirs_for_log})

        # Calculate the difference between the old and new file states
        diff_json = compute_diff(new_meta_info, old_meta_info)

        inc_backup_path = general_function.get_full_path(
            general_inc_backup_dir, backup_file_name, 'tar', gzip)

        # Define the list of files that need to be included in the archive
        target_change_list = diff_json['modify']

        # Form GNU.dumpdir headers
        dict_directory = {}  # Dict to store pairs like dir:GNU.dumpdir

        excludes = r'|'.join([
            fnmatch.translate(x)[:-7] for x in general_files_func.EXCLUDE_FILES
        ]) or r'$.'

        for dir_name, dirs, files in os.walk(target):
            first_level_files = []

            if re.match(excludes, dir_name):
                continue

            for file in files:
                if re.match(excludes, os.path.join(dir_name, file)):
                    continue

                first_level_files.append(file)

            first_level_subdirs = dirs
            dict_directory[dir_name] = get_gnu_dumpdir_format(
                diff_json, dir_name, target, excludes, first_level_subdirs,
                first_level_files)

        create_inc_tar(inc_backup_path, remote_dir, dict_directory,
                       target_change_list, gzip, job_name, storage, host,
                       share)

        if symlink_dir:
            if storage in 'local, scp':
                link_dict[daily_inc_file] = month_inc_file
            else:
                copy_dict[daily_inc_file] = month_inc_file

    if link_dict:
        for key in link_dict.keys():
            src = link_dict[key]
            dst = key

            try:
                general_function.create_symlink(src, dst)
            except general_function.MyError as err:
                log_and_mail.writelog(
                    'ERROR',
                    "Can't create symlink %s -> %s: %s" % (src, dst, err),
                    config.filelog_fd, job_name)

    if copy_dict:
        for key in copy_dict.keys():
            src = copy_dict[key]
            dst = key

            try:
                general_function.copy_ofs(src, dst)
            except general_function.MyError as err:
                log_and_mail.writelog(
                    'ERROR', "Can't copy %s -> %s: %s" % (src, dst, err),
                    config.filelog_fd, job_name)
def create_inc_backup(local_dst_dirname, remote_dir, part_of_dir_path, backup_file_name,
                      target, exclude_list, gzip, job_name, storage, host, share, months_to_store):
    """ The function determines whether to collect a full backup or incremental,
    prepares all the necessary information.

    """
    date_year = general_function.get_time_now('year')
    date_month = general_function.get_time_now('moy')
    date_day = general_function.get_time_now('dom')

    dated_paths = get_dated_paths(local_dst_dirname, part_of_dir_path, date_year, date_month, date_day)

    # Before we proceed to collect a copy, we need to delete the copies for the same month last year
    # if they are to not save extra archives
    old_month_dirs = []
    if os.path.isdir(dated_paths['old_year_dir']) or months_to_store < 12:
        if months_to_store < 12:
            int_date_month = int(date_month)
            last_month = int_date_month - months_to_store
            if last_month <= 0:
                m_range = list(range(last_month+12, 13))
                m_range.extend(list(range(1, int_date_month)))
            else:
                m_range = list(range(last_month, int_date_month))
            for i in range(1, 13):
                if i not in m_range:
                    date = str(i).zfill(2)
                    if i < int(date_month):
                        year_to_cleanup = dated_paths['year_dir']
                    else:
                        year_to_cleanup = dated_paths['old_year_dir']
                    old_month_dirs.append(os.path.join(year_to_cleanup, f'month_{date}'))
        else:
            old_month_dirs.append(os.path.join(dated_paths['old_year_dir'], f'month_{date_month}'))
        del_old_inc_file(dated_paths['old_year_dir'], old_month_dirs)

    link_dict = {}  # dict for symlink with pairs like dst: src
    copy_dict = {}  # dict for copy with pairs like dst: src

    # Get the current list of files
    new_meta_info = get_index(target, exclude_list)

    if not os.path.isfile(dated_paths['year_inc_file']):
        # There is no original index file, so we need to check the existence of an year directory
        if os.path.isdir(dated_paths['year_dir']):
            # There is a directory, but there is no file itself, then something went wrong, so
            # we delete this directory with all the data inside, because even if they are there
            # continue to collect incremental copies it will not be able to
            general_function.del_file_objects(job_name, dated_paths['year_dir'])
            dirs_for_log = general_function.get_dirs_for_log(dated_paths['year_dir'], remote_dir, storage)
            file_for_log = os.path.join(dirs_for_log, os.path.basename(dated_paths['year_inc_file']))
            log_and_mail.writelog('ERROR',
                                  f"The file {file_for_log} not found, so the directory {dirs_for_log} is cleared. "
                                  f"Incremental backup will be reinitialized ",
                                  config.filelog_fd, job_name)

        # Initialize the incremental backup, i.e. collect a full copy
        remote_dir_for_logs = general_function.get_dirs_for_log(dated_paths['initial_dir'], remote_dir, storage)
        general_function.create_dirs(job_name=job_name, dirs_pairs={dated_paths['initial_dir']: remote_dir_for_logs})

        write_meta_info(dated_paths['year_inc_file'], new_meta_info)

        full_backup_path = general_function.get_full_path(dated_paths['initial_dir'],
                                                          backup_file_name,
                                                          'tar',
                                                          gzip)

        general_files_func.create_tar('files', full_backup_path, target,
                                      gzip, 'inc_files', job_name,
                                      remote_dir, storage, host, share)

        daily_dirs_remote = general_function.get_dirs_for_log(dated_paths['daily_dir'], remote_dir, storage)
        month_dirs_remote = general_function.get_dirs_for_log(dated_paths['month_dir'], remote_dir, storage)
        general_function.create_dirs(job_name=job_name, dirs_pairs={dated_paths['daily_dir']: daily_dirs_remote,
                                                                    dated_paths['month_dir']: month_dirs_remote})

        if storage in 'local':
            link_dict[dated_paths['month_inc_file']] = dated_paths['year_inc_file']
            link_dict[os.path.join(dated_paths['month_dir'], os.path.basename(full_backup_path))] = full_backup_path
            link_dict[dated_paths['daily_inc_file']] = dated_paths['year_inc_file']
            link_dict[os.path.join(dated_paths['daily_dir'], os.path.basename(full_backup_path))] = full_backup_path
        elif storage in 'scp, nfs':
            copy_dict[dated_paths['month_inc_file']] = dated_paths['year_inc_file']
            link_dict[os.path.join(dated_paths['month_dir'], os.path.basename(full_backup_path))] = \
                full_backup_path.replace(local_dst_dirname, remote_dir)
            copy_dict[dated_paths['daily_inc_file']] = dated_paths['year_inc_file']
            link_dict[os.path.join(dated_paths['daily_dir'], os.path.basename(full_backup_path))] = \
                full_backup_path.replace(local_dst_dirname, remote_dir)
        else:
            copy_dict[dated_paths['month_inc_file']] = dated_paths['year_inc_file']
            copy_dict[os.path.join(dated_paths['month_dir'], os.path.basename(full_backup_path))] = full_backup_path
            copy_dict[dated_paths['daily_inc_file']] = dated_paths['year_inc_file']
            copy_dict[os.path.join(dated_paths['daily_dir'], os.path.basename(full_backup_path))] = full_backup_path

    else:
        symlink_dir = ''
        meta_path = ''
        if int(date_day) == 1:
            meta_path = dated_paths['month_inc_file']
            old_meta_path = dated_paths['year_inc_file']
            general_inc_backup_dir = dated_paths['month_dir']
            symlink_dir = dated_paths['daily_dir']
        elif int(date_day) == 11 or int(date_day) == 21:
            meta_path = dated_paths['daily_inc_file']
            old_meta_path = dated_paths['month_inc_file']
            general_inc_backup_dir = dated_paths['daily_dir']
        else:
            old_meta_path = dated_paths['daily_inc_file']
            general_inc_backup_dir = dated_paths['daily_dir']

        try:
            old_meta_info = specific_function.parser_json(old_meta_path)
        except general_function.MyError as e:
            log_and_mail.writelog('ERROR',
                                  f"Couldn't open old meta info file '{old_meta_path}': {e}!",
                                  config.filelog_fd, job_name)
            return 2

        general_dirs_for_log = general_function.get_dirs_for_log(general_inc_backup_dir, remote_dir, storage)
        general_function.create_dirs(job_name=job_name, dirs_pairs={general_inc_backup_dir: general_dirs_for_log})
        if meta_path:
            write_meta_info(meta_path, new_meta_info)

        # Calculate the difference between the old and new file states
        diff_json = compute_diff(new_meta_info, old_meta_info)

        # Define the list of files that need to be included in the archive
        target_change_list = diff_json['modify']

        dict_directory = get_dict_directory(target, diff_json)

        inc_backup_path = general_function.get_full_path(general_inc_backup_dir, backup_file_name, 'tar', gzip)
        create_inc_tar(
            inc_backup_path, remote_dir, dict_directory, target_change_list, gzip, job_name, storage, host, share
        )

        if symlink_dir:
            symlink_dirs_for_log = general_function.get_dirs_for_log(symlink_dir, remote_dir, storage)
            general_function.create_dirs(job_name=job_name, dirs_pairs={symlink_dir: symlink_dirs_for_log})
            if storage in 'local':
                link_dict[dated_paths['daily_inc_file']] = dated_paths['month_inc_file']
            elif storage in 'scp, nfs':
                copy_dict[dated_paths['daily_inc_file'].replace(local_dst_dirname, remote_dir)] = \
                    dated_paths['month_inc_file'].replace(local_dst_dirname, remote_dir)
            else:
                copy_dict[dated_paths['daily_inc_file']] = dated_paths['month_inc_file']

    create_links_and_copies(link_dict, copy_dict, job_name)