def __init__(self, patching, logger, hutil):
     """
     """
     self.patching = patching
     self.logger = logger
     self.hutil = hutil
     try:
         self.mounts = Mounts(patching=self.patching, logger=self.logger)
     except Exception as e:
         errMsg = 'Failed to retrieve mount points, Exception %s, stack trace: %s' % (
             str(e), traceback.format_exc())
         self.logger.log(errMsg, True, 'Warning')
         self.logger.log(str(e), True)
         self.mounts = None
     self.frozen_items = set()
     self.unfrozen_items = set()
     self.freeze_handler = FreezeHandler(self.logger, self.hutil)
     self.mount_open_failed = False
     resource_disk = ResourceDiskUtil(patching=patching, logger=logger)
     self.resource_disk_mount_point = resource_disk.get_resource_disk_mount_point(
     )
     self.skip_freeze = True
     self.isAquireLockSucceeded = True
     self.getLockRetry = 0
     self.maxGetLockRetry = 5
Example #2
0
    def __init__(self, patching, logger, hutil):
        """
        """
        self.patching = patching
        self.logger = logger
        self.hutil = hutil
        self.safeFreezeFolderPath = "safefreeze/bin/safefreeze"
        self.isArm64Machine = False

        try:
            platformMachine = platform.machine()
            architectureFromUname = os.uname()[-1]
            self.logger.log("platformMachine : " + str(platformMachine) +
                            " architectureFromUname : " +
                            str(architectureFromUname))
            if ((platformMachine != None and
                 (platformMachine.startswith("aarch64")
                  or platformMachine.startswith("arm64")))
                    or (architectureFromUname != None and
                        (architectureFromUname.startswith("aarch64")
                         or architectureFromUname.startswith("arm64")))):
                self.isArm64Machine = True
        except Exception as e:
            errorMsg = "Unable to fetch machine processor architecture, error: %s, stack trace: %s" % (
                str(e), traceback.format_exc())
            self.logger.log(errorMsg, 'Error')

        if (self.isArm64Machine == True):
            self.logger.log("isArm64Machine : " + str(self.isArm64Machine) +
                            " Using ARM64 safefreeze binary")
            self.safeFreezeFolderPath = "safefreezeArm64/bin/safefreeze"
        else:
            self.logger.log("isArm64Machine : " + str(self.isArm64Machine) +
                            " Using x64 safefreeze binary")
            self.safeFreezeFolderPath = "safefreeze/bin/safefreeze"

        try:
            self.mounts = Mounts(patching=self.patching, logger=self.logger)
        except Exception as e:
            errMsg = 'Failed to retrieve mount points, Exception %s, stack trace: %s' % (
                str(e), traceback.format_exc())
            self.logger.log(errMsg, True, 'Warning')
            self.logger.log(str(e), True)
            self.mounts = None
        self.frozen_items = set()
        self.unfrozen_items = set()
        self.freeze_handler = FreezeHandler(self.logger, self.hutil)
        self.mount_open_failed = False
        resource_disk = ResourceDiskUtil(patching=patching, logger=logger)
        self.resource_disk_mount_point = resource_disk.get_resource_disk_mount_point(
        )
        self.skip_freeze = True
        self.isAquireLockSucceeded = True
        self.getLockRetry = 0
        self.maxGetLockRetry = 5
        self.safeFreezelockFile = None
Example #3
0
    def get_total_used_size(self):
        try:
            size_calc_failed = False
            df = subprocess.Popen(["df", "-k"], stdout=subprocess.PIPE)
            '''
            Sample output of the df command

            Filesystem                                              Type     1K-blocks    Used    Avail Use% Mounted on
            /dev/sda2                                               xfs       52155392 3487652 48667740   7% /
            devtmpfs                                                devtmpfs   7170976       0  7170976   0% /dev
            tmpfs                                                   tmpfs      7180624       0  7180624   0% /dev/shm
            tmpfs                                                   tmpfs      7180624  760496  6420128  11% /run
            tmpfs                                                   tmpfs      7180624       0  7180624   0% /sys/fs/cgroup
            /dev/sda1                                               ext4        245679  151545    76931  67% /boot
            /dev/sdb1                                               ext4      28767204 2142240 25140628   8% /mnt/resource
            /dev/mapper/mygroup-thinv1                              xfs        1041644   33520  1008124   4% /bricks/brick1
            /dev/mapper/mygroup-85197c258a54493da7880206251f5e37_0  xfs        1041644   33520  1008124   4% /run/gluster/snaps/85197c258a54493da7880206251f5e37/brick2
            /dev/mapper/mygroup2-thinv2                             xfs       15717376 5276944 10440432  34% /tmp/test
            /dev/mapper/mygroup2-63a858543baf4e40a3480a38a2f232a0_0 xfs       15717376 5276944 10440432  34% /run/gluster/snaps/63a858543baf4e40a3480a38a2f232a0/brick2
            tmpfs                                                   tmpfs      1436128       0  1436128   0% /run/user/1000
            //Centos72test/cifs_test                                cifs      52155392 4884620 47270772  10% /mnt/cifs_test2

            '''
            output = ""
            process_wait_time = 300
            while (df is not None and process_wait_time > 0
                   and df.poll() is None):
                time.sleep(1)
                process_wait_time -= 1
            self.logger.log(
                "df command executed for process wait time value" +
                str(process_wait_time), True)
            if (df is not None and df.poll() is not None):
                self.logger.log("df return code" + str(df.returncode), True)
                output = df.stdout.read()
            if sys.version_info > (3, ):
                output = str(output,
                             encoding='utf-8',
                             errors="backslashreplace")
            else:
                output = str(output)
            output = output.strip().split("\n")
            disk_loop_devices_file_systems = self.get_loop_devices()
            self.logger.log("outside loop device", True)
            total_used = 0
            total_used_network_shares = 0
            total_used_gluster = 0
            total_used_loop_device = 0
            total_used_temporary_disks = 0
            total_used_ram_disks = 0
            total_used_unknown_fs = 0
            actual_temp_disk_used = 0
            total_sd_size = 0
            network_fs_types = []
            unknown_fs_types = []

            if len(self.file_systems_info) == 0:
                self.file_systems_info = disk_util.get_mount_file_systems()

            output_length = len(output)
            index = 1
            self.resource_disk = ResourceDiskUtil(patching=self.patching,
                                                  logger=self.logger)
            resource_disk_device = self.resource_disk.get_resource_disk_mount_point(
                0)
            resource_disk_device = "/dev/{0}".format(resource_disk_device)
            device_list = self.device_list_for_billing(
            )  #new logic: calculate the disk size for billing

            while index < output_length:
                if (len(output[index].split()) <
                        6):  #when a row is divided in 2 lines
                    index = index + 1
                    if (index < output_length
                            and len(output[index - 1].split()) +
                            len(output[index].split()) == 6):
                        output[index] = output[index - 1] + output[index]
                    else:
                        self.logger.log(
                            "Output of df command is not in desired format",
                            True)
                        total_used = 0
                        size_calc_failed = True
                        break
                device, size, used, available, percent, mountpoint = output[
                    index].split()
                fstype = ''
                isNetworkFs = False
                isKnownFs = False

                for file_system_info in self.file_systems_info:
                    if device == file_system_info[
                            0] and mountpoint == file_system_info[2]:
                        fstype = file_system_info[1]
                self.logger.log(
                    "Device name : {0} fstype : {1} size : {2} used space in KB : {3} available space : {4} mountpoint : {5}"
                    .format(device, fstype, size, used, available,
                            mountpoint), True)

                for nonPhysicaFsType in self.non_physical_file_systems:
                    if nonPhysicaFsType in fstype.lower():
                        isNetworkFs = True
                        break

                for knownFs in self.known_fs:
                    if knownFs in fstype.lower():
                        isKnownFs = True
                        break

                if device == resource_disk_device and self.isOnlyOSDiskBackupEnabled == False:  # adding log to check difference in billing of temp disk
                    self.logger.log(
                        "Actual temporary disk, Device name : {0} used space in KB : {1} fstype : {2}"
                        .format(device, used, fstype), True)
                    actual_temp_disk_used = int(used)

                if device in device_list and device != resource_disk_device:
                    self.logger.log(
                        "Adding sd* partition, Device name : {0} used space in KB : {1} fstype : {2}"
                        .format(device, used, fstype), True)
                    total_sd_size = total_sd_size + int(
                        used)  #calcutale total sd* size just skip temp disk

                if not (isKnownFs or fstype == '' or fstype == None):
                    unknown_fs_types.append(fstype)

                if isNetworkFs:
                    if fstype not in network_fs_types:
                        network_fs_types.append(fstype)
                    self.logger.log(
                        "Not Adding network-drive, Device name : {0} used space in KB : {1} fstype : {2}"
                        .format(device, used, fstype), True)
                    total_used_network_shares = total_used_network_shares + int(
                        used)

                elif device == "/dev/sdb1" and self.isOnlyOSDiskBackupEnabled == False:  #<todo> in some cases root is mounted on /dev/sdb1
                    self.logger.log(
                        "Not Adding temporary disk, Device name : {0} used space in KB : {1} fstype : {2}"
                        .format(device, used, fstype), True)
                    total_used_temporary_disks = total_used_temporary_disks + int(
                        used)

                elif "tmpfs" in fstype.lower() or "devtmpfs" in fstype.lower(
                ) or "ramdiskfs" in fstype.lower() or "rootfs" in fstype.lower(
                ):
                    self.logger.log(
                        "Not Adding RAM disks, Device name : {0} used space in KB : {1} fstype : {2}"
                        .format(device, used, fstype), True)
                    total_used_ram_disks = total_used_ram_disks + int(used)

                elif 'loop' in device and device not in disk_loop_devices_file_systems:
                    self.logger.log(
                        "Not Adding Loop Device , Device name : {0} used space in KB : {1} fstype : {2}"
                        .format(device, used, fstype), True)
                    total_used_loop_device = total_used_loop_device + int(used)

                elif (mountpoint.startswith('/run/gluster/snaps/')):
                    self.logger.log(
                        "Not Adding Gluster Device , Device name : {0} used space in KB : {1} mount point : {2}"
                        .format(device, used, mountpoint), True)
                    total_used_gluster = total_used_gluster + int(used)

                elif device.startswith('\\\\') or device.startswith('//'):
                    self.logger.log(
                        "Not Adding network-drive as it starts with slahes, Device name : {0} used space in KB : {1} fstype : {2}"
                        .format(device, used, fstype), True)
                    total_used_network_shares = total_used_network_shares + int(
                        used)

                else:
                    if (self.isOnlyOSDiskBackupEnabled == True):
                        if (mountpoint == '/'):
                            total_used = total_used + int(used)
                            self.logger.log(
                                "Adding only root device to size calculation. Device name : {0} used space in KB : {1} mount point : {2} fstype : {3}"
                                .format(device, used, mountpoint,
                                        fstype), True)
                            self.logger.log(
                                "Total Used Space: {0}".format(total_used),
                                True)
                    else:
                        self.logger.log(
                            "Adding Device name : {0} used space in KB : {1} mount point : {2} fstype : {3}"
                            .format(device, used, mountpoint, fstype), True)
                        total_used = total_used + int(used)  #return in KB
                    if not (isKnownFs or fstype == '' or fstype == None):
                        total_used_unknown_fs = total_used_unknown_fs + int(
                            used)

                index = index + 1

            if not len(unknown_fs_types) == 0:
                Utils.HandlerUtil.HandlerUtility.add_to_telemetery_data(
                    "unknownFSTypeInDf", str(unknown_fs_types))
                Utils.HandlerUtil.HandlerUtility.add_to_telemetery_data(
                    "totalUsedunknownFS", str(total_used_unknown_fs))
                self.logger.log(
                    "Total used space in Bytes of unknown FSTypes : {0}".
                    format(total_used_unknown_fs * 1024), True)

            if total_used_temporary_disks != actual_temp_disk_used:
                self.logger.log(
                    "Billing differenct because of incorrect temp disk: {0}".
                    format(
                        str(total_used_temporary_disks -
                            actual_temp_disk_used)))

            if not len(network_fs_types) == 0:
                Utils.HandlerUtil.HandlerUtility.add_to_telemetery_data(
                    "networkFSTypeInDf", str(network_fs_types))
                Utils.HandlerUtil.HandlerUtility.add_to_telemetery_data(
                    "totalUsedNetworkShare", str(total_used_network_shares))
                self.logger.log(
                    "Total used space in Bytes of network shares : {0}".format(
                        total_used_network_shares * 1024), True)
            if total_used_gluster != 0:
                Utils.HandlerUtil.HandlerUtility.add_to_telemetery_data(
                    "glusterFSSize", str(total_used_gluster))
            if total_used_temporary_disks != 0:
                Utils.HandlerUtil.HandlerUtility.add_to_telemetery_data(
                    "tempDisksSize", str(total_used_temporary_disks))
            if total_used_ram_disks != 0:
                Utils.HandlerUtil.HandlerUtility.add_to_telemetery_data(
                    "ramDisksSize", str(total_used_ram_disks))
            if total_used_loop_device != 0:
                Utils.HandlerUtil.HandlerUtility.add_to_telemetery_data(
                    "loopDevicesSize", str(total_used_loop_device))
            self.logger.log(
                "Total used space in Bytes : {0}".format(total_used * 1024),
                True)
            if total_sd_size != 0:
                Utils.HandlerUtil.HandlerUtility.add_to_telemetery_data(
                    "totalsdSize", str(total_sd_size))
            self.logger.log(
                "Total sd* used space in Bytes : {0}".format(total_sd_size *
                                                             1024), True)

            return total_used * 1024, size_calc_failed  #Converting into Bytes
        except Exception as e:
            errMsg = 'Unable to fetch total used space with error: %s, stack trace: %s' % (
                str(e), traceback.format_exc())
            self.logger.log(errMsg, True)
            size_calc_failed = True
            return 0, size_calc_failed
class FsFreezer:
    def __init__(self, patching, logger, hutil):
        """
        """
        self.patching = patching
        self.logger = logger
        self.hutil = hutil
        try:
            self.mounts = Mounts(patching=self.patching, logger=self.logger)
        except Exception as e:
            errMsg = 'Failed to retrieve mount points, Exception %s, stack trace: %s' % (
                str(e), traceback.format_exc())
            self.logger.log(errMsg, True, 'Warning')
            self.logger.log(str(e), True)
            self.mounts = None
        self.frozen_items = set()
        self.unfrozen_items = set()
        self.freeze_handler = FreezeHandler(self.logger, self.hutil)
        self.mount_open_failed = False
        self.resource_disk = ResourceDiskUtil(patching=patching, logger=logger)
        self.skip_freeze = True
        self.isAquireLockSucceeded = True
        self.getLockRetry = 0
        self.maxGetLockRetry = 5

    def should_skip(self, mount):
        resource_disk_mount_point = self.resource_disk.get_resource_disk_mount_point(
        )
        if (resource_disk_mount_point is not None
                and mount.mount_point == resource_disk_mount_point):
            return True
        elif ((mount.fstype == 'ext3' or mount.fstype == 'ext4'
               or mount.fstype == 'xfs' or mount.fstype == 'btrfs')
              and mount.type != 'loop'):
            return False
        else:
            return True

    def freeze_safe(self, timeout):
        self.root_seen = False
        error_msg = ''
        timedout = False
        self.skip_freeze = True
        mounts_to_skip = None
        try:
            mounts_to_skip = self.hutil.get_strvalue_from_configfile(
                'MountsToSkip', '')
            self.logger.log("skipped mount :" + str(mounts_to_skip), True)
            mounts_list_to_skip = mounts_to_skip.split(',')
        except Exception as e:
            errMsg = 'Failed to read from config, Exception %s, stack trace: %s' % (
                str(e), traceback.format_exc())
            self.logger.log(errMsg, True, 'Warning')
        try:
            freeze_result = FreezeResult()
            freezebin = os.path.join(os.getcwd(), os.path.dirname(__file__),
                                     "safefreeze/bin/safefreeze")
            args = [freezebin, str(timeout)]
            no_mount_found = True
            for mount in self.mounts.mounts:
                self.logger.log("fsfreeze mount :" + str(mount.mount_point),
                                True)
                if (mount.mount_point == '/'):
                    self.root_seen = True
                    self.root_mount = mount
                elif (mount.mount_point not in mounts_list_to_skip
                      and not self.should_skip(mount)):
                    if (self.skip_freeze == True):
                        self.skip_freeze = False
                    args.append(str(mount.mount_point))
            if (self.root_seen and not self.should_skip(self.root_mount)):
                if (self.skip_freeze == True):
                    self.skip_freeze = False
                args.append('/')
            self.logger.log("skip freeze is : " + str(self.skip_freeze), True)
            if (self.skip_freeze == True):
                return freeze_result, timedout
            self.logger.log("arg : " + str(args), True)
            self.freeze_handler.reset_signals()
            self.freeze_handler.signal_receiver()
            self.logger.log("proceeded for accepting signals", True)
            if (mounts_to_skip == '/'
                ):  #for continue logging to avoid out of memory issue
                self.logger.enforce_local_flag(True)
            else:
                self.logger.enforce_local_flag(False)

            start_time = datetime.datetime.utcnow()

            while self.getLockRetry < self.maxGetLockRetry:
                try:
                    if not os.path.isdir(
                            '/etc/azure/MicrosoftRecoverySvcsSafeFreezeLock'):
                        os.mkdir(
                            '/etc/azure/MicrosoftRecoverySvcsSafeFreezeLock')
                    file = open(
                        "/etc/azure/MicrosoftRecoverySvcsSafeFreezeLock/SafeFreezeLockFile",
                        "w")
                    self.logger.log(
                        "/etc/azure/MicrosoftRecoverySvcsSafeFreezeLock/SafeFreezeLockFile file opened Sucessfully",
                        True)
                    try:
                        fcntl.lockf(file, fcntl.LOCK_EX | fcntl.LOCK_NB)
                        self.logger.log("Aquiring lock succeeded", True)
                        self.isAquireLockSucceeded = True
                        break
                    except Exception as ex:
                        file.close()
                        raise ex
                except Exception as e:
                    self.logger.log(
                        "Failed to open file or aquire lock:  " + str(e), True)
                    self.isAquireLockSucceeded = False
                    self.getLockRetry = self.getLockRetry + 1
                    time.sleep(1)
                    if (self.getLockRetry == self.maxGetLockRetry - 1):
                        time.sleep(30)
                self.logger.log(
                    "Retry to aquire lock count: " + str(self.getLockRetry),
                    True)

            end_time = datetime.datetime.utcnow()
            self.logger.log(
                "Wait time to aquire lock " + str(end_time - start_time), True)

            sig_handle = None
            if (self.isAquireLockSucceeded == True):
                sig_handle = self.freeze_handler.startproc(args)
                self.thaw_safe()
                try:
                    fcntl.lockf(file, fcntl.LOCK_UN)
                    file.close()
                except:
                    pass
            try:
                os.remove(
                    "/etc/azure/MicrosoftRecoverySvcsSafeFreezeLock/SafeFreezeLockFile"
                )
            except:
                pass

            self.logger.log(
                "freeze_safe after returning from startproc : sig_handle=" +
                str(sig_handle))
            if (sig_handle != 1):
                if (self.freeze_handler.child is not None):
                    self.log_binary_output()
                if (sig_handle == 0):
                    timedout = True
                    error_msg = "freeze timed-out"
                    freeze_result.errors.append(error_msg)
                    self.logger.log(error_msg, True, 'Error')
                elif (self.mount_open_failed == True):
                    error_msg = CommonVariables.unable_to_open_err_string
                    freeze_result.errors.append(error_msg)
                    self.logger.log(error_msg, True, 'Error')
                elif (self.isAquireLockSucceeded == False):
                    error_msg = "Mount Points already freezed by some other processor"
                    freeze_result.errors.append(error_msg)
                    self.logger.log(error_msg, True, 'Error')
                else:
                    error_msg = "freeze failed for some mount"
                    freeze_result.errors.append(error_msg)
                    self.logger.log(error_msg, True, 'Error')
        except Exception as e:
            self.logger.enforce_local_flag(True)
            error_msg = 'freeze failed for some mount with exception, Exception %s, stack trace: %s' % (
                str(e), traceback.format_exc())
            freeze_result.errors.append(error_msg)
            self.logger.log(error_msg, True, 'Error')
        return freeze_result, timedout

    def thaw_safe(self):
        thaw_result = FreezeResult()
        unable_to_sleep = False
        if (self.skip_freeze == True):
            return thaw_result, unable_to_sleep
        if (self.freeze_handler.child is None):
            self.logger.log("child already completed", True)
            self.logger.log(
                "****** 7. Error - Binary Process Already Completed", True)
            error_msg = 'snapshot result inconsistent'
            thaw_result.errors.append(error_msg)
        elif (self.freeze_handler.child.poll() is None):
            self.logger.log("child process still running")
            self.logger.log("****** 7. Sending Thaw Signal to Binary")
            self.freeze_handler.child.send_signal(signal.SIGUSR1)
            for i in range(0, 30):
                if (self.freeze_handler.child.poll() is None):
                    self.logger.log("child still running sigusr1 sent")
                    time.sleep(1)
                else:
                    break
            self.logger.enforce_local_flag(True)
            self.log_binary_output()
            if (self.freeze_handler.child.returncode != 0):
                error_msg = 'snapshot result inconsistent as child returns with failure'
                thaw_result.errors.append(error_msg)
                self.logger.log(error_msg, True, 'Error')
        else:
            self.logger.log(
                "Binary output after process end when no thaw sent: ", True)
            if (self.freeze_handler.child.returncode == 2):
                error_msg = 'Unable to execute sleep'
                thaw_result.errors.append(error_msg)
                unable_to_sleep = True
            else:
                error_msg = 'snapshot result inconsistent'
                thaw_result.errors.append(error_msg)
            self.logger.enforce_local_flag(True)
            self.log_binary_output()
            self.logger.log(error_msg, True, 'Error')
        self.logger.enforce_local_flag(True)
        return thaw_result, unable_to_sleep

    def log_binary_output(self):
        self.logger.log(
            "============== Binary output traces start ================= ",
            True)
        while True:
            line = self.freeze_handler.child.stdout.readline()
            if sys.version_info > (3, ):
                line = str(line, encoding='utf-8', errors="backslashreplace")
            else:
                line = str(line)
            if ("Failed to open:" in line):
                self.mount_open_failed = True
            if (line != ''):
                self.logger.log(line.rstrip(), True)
            else:
                break
        self.logger.log(
            "============== Binary output traces end ================= ", True)