Esempio n. 1
0
    def collect_results(self):
        """
        Collect experiment results.

        Raises:
            SrtUtilsException
        """
        logger.info('Collecting experiment results')

        if not self.is_started:
            raise SrtUtilsException(
                'Experiment has not been started yet. Can not collect results')

        # This is done to prevent the situation when the experiment is still
        # running and we are trying to collect results before stopping it
        if not self.is_stopped:
            raise SrtUtilsException(
                'Experiment is still running. Can not collect results')

        for task in self.tasks:
            logging.info(f'Collecting task results: {task}')
            # This try/except block is needed here in order to collect results
            # for as much tasks as we can in case of something has failed
            try:
                task.obj_runner.collect_results()
            except SrtUtilsException as error:
                logger.error(
                    f'Failed to collect task results: {task}. Reason: {error}')
                continue
Esempio n. 2
0
    def _kill(self):
        """
        Kill process.

        Raises:
            SrtUtilsException
        """
        logger.debug(f'Killing process: {self.id}')

        if not self.is_started:
            raise SrtUtilsException(
                'Process has not been started yet. Kill can not be done'
            )

        if self.is_stopped:
            return

        status, _ = self.status
        if status == Status.idle: 
            return

        self.process.kill()
        time.sleep(1)

        status, _ = self.status
        if status == Status.running:
            raise SrtUtilsException(f'Process has not been killed: {self.id}')
Esempio n. 3
0
def before_collect_results_checks(
    obj: IObject,
    process: Process,
    collect_results_path: pathlib.Path
):
    """
    Helper function which performs prelimenary checks for `LocalRunner` and
    `RemoteRunner` classes before collecting object results.
    """
    if not process.is_started:
        raise SrtUtilsException(
            f'Process has not been started yet: {obj}. '
            'Can not collect results'
        )

    if not process.is_stopped:
        raise SrtUtilsException(
            f'Process has not been stopped yet: {obj}, {process}. '
            'Can not collect results'
        )

    # It's expected that at this moment directory 
    # self.collect_results_path already exists, because it is created 
    # in SingleExperimentRunner class
    if not collect_results_path.exists():
        raise SrtUtilsException(
            'There was no directory for collecting results created: '
            f'{collect_results_path}. Can not collect results'
        )

    # If an object has filepath equal to None, it means there should be
    # no output file produced
    if obj.filepath == None:
        logger.info('There was no output file expected, nothing to collect')
        return
Esempio n. 4
0
    def _terminate(self):
        """
        Terminate process.

        Raises:
            SrtUtilsException
        """
        logger.debug(f'Terminating process: {self.id}')

        if not self.is_started:
            raise SrtUtilsException(
                'Process has not been started yet. Terminate can not be done'
            )

        if self.is_stopped:
            return

        status, _ = self.status
        if status == Status.idle:
            return

        logger.debug('Sending SIGINT/CTRL_C_EVENT signal')
        sig = signal.CTRL_C_EVENT if sys.platform == 'win32' else signal.SIGINT
        self.process.send_signal(sig)
        for i in range(3):
            time.sleep(1)
            status, _ = self.status
            if status == Status.idle:
                return

        raise SrtUtilsException(f'Process has not been terminated: {self.id}')
Esempio n. 5
0
    def stop(self):
        """
        Stop process.

        Raises:
            SrtUtilsException
        """
        logger.debug(f'Stopping process: {self.id}')

        if not self.is_started:
            raise SrtUtilsException(
                'Process has not been started yet. Stop can not be done'
            )

        if self.is_stopped:
            return

        # NOTE: There is a problem with terminating processes which use SSH 
        # to run a command on a remote server. The problem is in SSH not 
        # forwarding a signal (e.g., SIGINT, SIGTERM). As a result, SSH session 
        # itself terminates and process.poll() returns None, however 
        # an application started from a command continues to work on a remote server.
        # The solution is to use -t option in order to allocate a pseudo-terminal. 
        # See https://stackoverflow.com/questions/48419781/work-around-ssh-does-not-forward-signal
        # for details. FIXME: Maybe it is reasonable to add additional check in
        # clean-up actions that the process is not running on a remote server
        # ps -A | grep [process_name]

        # FIXME: However, there is a problem with wrong interpretation of carriage 
        # (\r\n) from pseudo-terminal in this case. Check stdout, it is full of b'\r\n'.

        # FIXME: Signals may not work on Windows properly. Might be useful
        # https://stefan.sofa-rockers.org/2013/08/15/handling-sub-process-hierarchies-python-linux-os-x/

        try:
            self._terminate()
        except SrtUtilsException:
            logger.error(f'Failed to terminate process: {self.id}')

            # TODO: (For future) Experiment with this more. If stransmit will not 
            # stop after several terminations, there is a problem, and kill() will
            # hide this problem in this case.
            
            # TODO: (!) There is a problem with tsp, it's actually not killed
            # however process_is_running(process) becomes False

            try:
                self._kill()
            except SrtUtilsException:
                logger.error(f'Failed to kill process: {self.id}')
                raise SrtUtilsException(
                    f'Process has not been stopped: {self.id}'
                )

        self.is_stopped = True
Esempio n. 6
0
    def _create_directory(
        dirpath: str,
        username: str,
        host: str
    ):
        """
        Create directory on a remote machine via SSH for saving object 
        results before starting the object.

        Attributes:
            dirpath:
                `pathlib.Path` directory path.
            username:
                Username on the remote machine to connect througth.
            host:
                IP address of the remote machine to connect.

        Raises:
            SrtUtilsException
        """
        logger.info(
            '[RemoteRunner] Creating a directory for saving object results '
            f'remotely via SSH. Username: {username}, host: {host}, '
            f'dirpath: {dirpath}'
        )

        try:
            # FIXME: By default Paramiko will attempt to connect to a running 
            # SSH agent (Unix style, e.g. a live SSH_AUTH_SOCK, or Pageant if 
            # one is on Windows). That's why promt for login-password is not 
            # disabled under condition that password is not configured via 
            # connect_kwargs.password
            with fabric.Connection(host=host, user=username) as c:
                result = c.run(f'mkdir -p {dirpath}')
        except paramiko.ssh_exception.SSHException as error:
            raise SrtUtilsException(
                f'Directory has not been created: {dirpath}. Exception '
                f'occured ({error.__class__.__name__}): {error}. Check that '
                'ssh-agent has been started before running the script'
            )
        except TimeoutError as error:
            raise SrtUtilsException(
                f'Directory has not been created: {dirpath}. Exception '
                f'occured ({error.__class__.__name__}): {error}. Check that '
                'IP address of the remote machine is correct and the '
                'machine is not down'
            )

        if result.exited != 0:
            raise SrtUtilsException(f'Directory has not been created: {dirpath}')
Esempio n. 7
0
    def start(self):
        """
        Start single experiment.

        Raises:
            SrtUtilsException
        """
        # self.log.info('Starting experiment')
        logger.info('Starting single experiment')

        if self.is_started:
            raise SrtUtilsException(
                'Experiment has been started already. Start can not be done')

        self._create_directory(self.collect_results_path)

        for task in self.tasks:
            logging.info(f'Starting task: {task}')
            task.obj_runner.start()
            sleep_after_start = task.sleep_after_start
            if sleep_after_start is not None:
                logger.info(f'Sleeping {sleep_after_start}s after task start')
                time.sleep(sleep_after_start)

        self.is_started = True
Esempio n. 8
0
    def stop(self):
        """
        Stop single experiment.

        Raises:
            SrtUtilsException
        """
        logger.info(f'Stopping single experiment')
        not_stopped_tasks = 0

        if not self.is_started:
            raise SrtUtilsException(
                'Experiment has not been started yet. Stop can not be done')

        if self.is_stopped:
            logger.info('Experiment has been stopped already. Nothing to do')
            return

        logger.info(f'Stopping tasks in reversed order')

        # By default, stop the tasks in reverse order
        # TODO: Implement stopping tasks according to the specified stop order.
        # if self.ignore_stop_order:
        for task in reversed(self.tasks):
            logging.info(f'Stopping task: {task}')

            # This try/except block is needed here in order to stop as much
            # tasks as we can in case of something has failed
            try:
                task.obj_runner.stop()
            except SrtUtilsException as error:
                logger.error(f'Failed to stop task: {task}. Reason: {error}')
                not_stopped_tasks += 1
                continue
            finally:
                sleep_after_stop = task.sleep_after_stop
                if sleep_after_stop is not None:
                    logger.info(
                        f'Sleeping {sleep_after_stop}s after task stop')
                    time.sleep(sleep_after_stop)

        if not_stopped_tasks != 0:
            raise SrtUtilsException('Not all the tasks have been stopped')

        self.is_stopped = True
def before_collect_results_checks(obj: IObject, process: Process,
                                  collect_results_path: pathlib.Path):
    """
    Helper function which performs prelimenary checks for `LocalRunner` and
    `RemoteRunner` classes before collecting object results.
    """
    if not process.is_started:
        raise SrtUtilsException(f'Process has not been started yet: {obj}. '
                                'Can not collect results')

    if not process.is_stopped:
        raise SrtUtilsException(
            f'Process has not been stopped yet: {obj}, {process}. '
            'Can not collect results')

    # It's expected that at this moment directory
    # self.collect_results_path already exists, because it is created
    # in SingleExperimentRunner class
    if not collect_results_path.exists():
        raise SrtUtilsException(
            'There was no directory for collecting results created: '
            f'{collect_results_path}. Can not collect results')
Esempio n. 10
0
    def collect_results(self):
        """
        Collect process results: stderr, stdout.

        Raises:
            SrtUtilsException
        """
        if not self.is_started:
            raise SrtUtilsException(
                f'Process has not been started yet. '
                f'Can not collect results'
            )

        stdout = self.process.stdout.readlines()
        stderr = self.process.stderr.readlines()

        return stdout, stderr
Esempio n. 11
0
    def _create_directory(dirpath: pathlib.Path):
        """
        Create a local directory for saving experiment results.

        Raises:
            SrtUtilsException
        """
        logger.info(
            '[SingleExperimentRunner] Creating a local directory for saving '
            f'experiment results: {dirpath}')

        created = create_local_directory(dirpath)

        if not created:
            raise SrtUtilsException(
                'Directory for saving experiment results already exists: '
                f'{dirpath}. Please use non-existing directory name and '
                'start the experiment again. Existing directory contents '
                'will not be deleted')
Esempio n. 12
0
def create_local_directory(dirpath: pathlib.Path):
    """
    Helper function used to create the directory locally.

    Attributes:
        dirpath:
            `pathlib.Path` directory path.
    """
    if dirpath.exists():
        return False

    # TODO: Debug and improve this in order to catch particular exceptions
    try:
        dirpath.mkdir(parents=True)
    except Exception as error:
        raise SrtUtilsException(
            f'Directory has not been created: {dirpath}. Exception '
            f'occured ({error.__class__.__name__}): {error}')

    return True
Esempio n. 13
0
    def clean_up(self):
        """
        Perform cleaning up in case of something has gone wrong during 
        the experiment.

        Raises:
            SrtUtilsException
        """
        logger.info('Cleaning up after experiment')
        not_stopped_tasks = 0

        for task in self.tasks:
            if task.obj_runner.status == Status.running:
                logging.info(f'Stopping task: {task}')

                try:
                    task.obj_runner.stop()
                except SrtUtilsException as error:
                    logger.error(
                        f'Failed to stop task: {task}, retrying to stop '
                        f'again. Reason: {error}')

                    try:
                        task.obj_runner.stop()
                    except SrtUtilsException as error:
                        logger.error(
                            f'Failed to stop task on the second try: {task}. '
                            f'Reason: {error}')
                        not_stopped_tasks += 1
                        continue

        if not_stopped_tasks != 0:
            raise SrtUtilsException(
                'Not all the tasks have been stopped during cleaning up')

        self.is_stopped = True
Esempio n. 14
0
    def collect_results(self):
        """
        Before collecting object results, this function creates a local 
        directory `username@host` inside self.collect_results_path directory
        where the results produced by the object are copied.
        """
        logger.info(f'Collecting object results: {self.obj}, {self.process}')

        before_collect_results_checks(self.obj, self.process,
                                      self.collect_results_path)

        # If an object has filepath equal to None, it means there should be
        # no output file produced
        if self.obj.filepath is None:
            logger.info(
                'There was no output file expected, nothing to collect')
            return

        # If an object has filepath defined, it means there should be
        # an output file produced. However it does not mean that the file
        # was created successfully, that's why we check whether the filepath exists.
        with fabric.Connection(host=self.host, user=self.username) as c:
            if not exists(c, self.obj.filepath):
                stdout, stderr = self.process.collect_results()
                raise SrtUtilsException(
                    'There was no output file produced by the object: '
                    f'{self.obj}, nothing to collect. Process stdout: '
                    f'{stdout}. Process stderr: {stderr}')

        # Create 'username@host' folder to copy produced by the object file
        # (inside self.collect_results_path directory)
        destination_dir = self.collect_results_path / f'{self.username}@{self.host}'
        logger.info('Creating a local directory for copying object results: '
                    f'{destination_dir}')
        created = create_local_directory(destination_dir)
        # if not created:
        #     logger.info(
        #         'Directory already exists, no need to create: '
        #         f'{destination_dir}'
        #     )

        logger.info(f'Copying object results into: {destination_dir}')
        filename = self.obj.filepath.name
        source = self.obj.filepath
        destination = destination_dir / filename

        if destination.exists():
            raise SrtUtilsException(
                'The destination file already exists, there might be a '
                f'file created by the other object: {destination}. File '
                f'with object results was not copied: {self.obj.filepath}')

        # TODO: Implement copying files using rsync
        try:
            # http://docs.fabfile.org/en/2.3/api/transfer.html
            with fabric.Connection(host=self.host, user=self.username) as c:
                result = c.get(source, destination)
        except OSError as error:
            raise SrtUtilsException(
                f'Object results have not been collected: {self.obj.filepath}'
                f'. Exception occured ({error.__class__.__name__}): {error}. ')
        except Exception as error:
            logger.info('Most probably paramiko exception')
            raise SrtUtilsException(
                f'Object results have not been collected: {self.obj.filepath}'
                f'. Exception occured ({error.__class__.__name__}): {error}. ')
Esempio n. 15
0
    def collect_results(self):
        """
        Before collecting object results, this function creates a local 
        directory `local` inside self.collect_results_path directory
        where the results produced by the object are copied.
        """
        logger.info(f'Collecting object results: {self.obj}, {self.process}')

        before_collect_results_checks(self.obj, self.process,
                                      self.collect_results_path)

        # If an object has filepath equal to None, it means there should be
        # no output file produced
        if self.obj.filepath is None:
            logger.info(
                'There was no output file expected, nothing to collect')
            return

        # If an object has filepath defined, it means there should be
        # an output file produced. However it does not mean that the file
        # was created successfully, that's why we check whether the filepath exists.
        if not self.obj.filepath.exists():
            stdout, stderr = self.process.collect_results()
            raise SrtUtilsException(
                'There was no output file produced by the object: '
                f'{self.obj}, nothing to collect. Process stdout: '
                f'{stdout}. Process stderr: {stderr}')

        # Create 'local' folder to copy produced by the object file
        # (inside self.collect_results_path directory)
        destination_dir = self.collect_results_path / 'local'
        logger.info('Creating a local directory for copying object results: '
                    f'{destination_dir}')
        created = create_local_directory(destination_dir)
        # if not created:
        #     logger.info(
        #         'Directory already exists, no need to create: '
        #         f'{destination_dir}'
        #     )

        # The code below will raise a FileExistsError if destination already exists.
        # Technically, this copies a file. To perform a move, simply delete source
        # after the copy is done. Make sure no exception was raised though.

        # In case we have several tasks which is runned locally by
        # LocalRunner runner and in case the tasks have the same names
        # for the output files, the result might be overwritten.
        # That's why we do not delete destination file before, instead
        # we catch FileExistsError exception. That's why it is necessary
        # to make sure that the file names for different tasks are unique.
        logger.info(f'Copying object results into: {destination_dir}')

        filename = self.obj.filepath.name
        source = self.obj.filepath
        destination = destination_dir / filename

        try:
            with destination.open(mode='xb') as fid:
                fid.write(source.read_bytes())
        except FileExistsError:
            raise SrtUtilsException(
                'The destination file already exists, there might be a '
                f'file created by the other object: {destination}. File '
                f'with object results was not copied: {self.obj.filepath}')
Esempio n. 16
0
    def start(self):
        """
        Start process.

        Raises:
            SrtUtilsException
        """
        logger.debug(f'Starting process')

        if self.is_started:
            raise SrtUtilsException(
                f'Process has been started already: {self.id}. '
                'Start can not be done'
            )

        try:
            if sys.platform == 'win32':
                self.process = subprocess.Popen(
                    self.args, 
                    stdin =subprocess.PIPE,
                    stdout=subprocess.PIPE,
                    stderr=subprocess.PIPE,
                    universal_newlines=False,
                    creationflags=subprocess.CREATE_NEW_PROCESS_GROUP,
                    bufsize=1
                )
            else:
                self.process = subprocess.Popen(
                    self.args, 
                    stdin =subprocess.PIPE,
                    stdout=subprocess.PIPE,
                    stderr=subprocess.PIPE,
                    #universal_newlines=False,
                    bufsize=1
                )
                self.is_started = True
        except OSError as error:
            raise SrtUtilsException(
                f'Process has not been started: {self.args}. {error}'
            )
    
        # TODO: Adjust timers
        # Check that the process has started successfully and has not terminated
        # because of an error
        if self.via_ssh:
            time.sleep(SSH_CONNECTION_TIMEOUT + 1)
        else:
            # FIXME: Find a better solution, I changed the time from 1 to 5 s,
            # cause it was not enough in case of errors with srt-test-messaging
            # app, e.g. when starting the caller first and there is no listener yet
            # NOTE: A good thing to consider - what would be in case the child process
            # finfishes its work earlier than the time specified (5s). It is
            # important to consider especially in case of fsrt and small files
            # transmission.
            time.sleep(5)

        status, returncode = self.status
        if status == Status.idle:
            raise SrtUtilsException(
                f'Process has not been started: {self.args}, returncode: '
                f'{returncode}, stdout: {self.process.stdout.readlines()}, '
                f'stderr: {self.process.stderr.readlines()}'
            )

        self.id = self.process.pid