Esempio n. 1
0
 def test_get_infrastructure_metrics(self):
     logger = CWLLogger(self.tmp_dir)
     metrics = logger.collect_infrastructure_metrics()
     self.assertIsNotNone(metrics.cpu_metrics)
     self.assertIsNotNone(metrics.vmemory_metrics)
     self.assertIsNotNone(metrics.disk_partition)
     self.assertIsNotNone(metrics.disk_usage)
    def test_load(self):
        storage_path = tempfile.mkdtemp()
        logger_storage = CWLLoggerStorageManager(storage_path)
        cwllogger = CWLLogger(tempfile.mkdtemp())
        data = cwllogger.to_dict()
        logger_storage.save(data)

        loaded_logs = list(logger_storage.load())
        self.assertListEqual([data], loaded_logs)

        data2 = cwllogger.to_dict()
        logger_storage.save(data2)
        # results are order with modification time so it must be inside
        loaded_logs = list(logger_storage.load())
        d = [data2, data]
        self.assertListEqual(
            d, loaded_logs,
            f'Results are not the same: ---------\n{json.dumps(d, indent=2)}\n-------\n{json.dumps(loaded_logs, indent=2)}'
        )

        data3 = CWLLogger(tempfile.mkdtemp()).to_dict()
        logger_storage.save(data3)
        # results are order with modification time so it must be inside
        loaded_logs = list(logger_storage.load(2))
        self.assertListEqual([data3, data2], loaded_logs)
Esempio n. 3
0
 def test_to_dict(self):
     logger = CWLLogger(self.tmp_dir)
     dict_log = logger.to_dict()
     import jsonschema
     schema_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                                '..', 'cwlkernel',
                                'loggerSchema.schema.json')
     with open(schema_path) as f:
         schema = json.load(f)
     jsonschema.validate(dict_log, schema)
Esempio n. 4
0
 def __init__(self, **kwargs):
     super().__init__(**kwargs)
     conf = CWLExecuteConfigurator()
     self._yaml_input_data: List[str] = []
     self._results_manager = IOFileManager(
         os.sep.join([conf.CWLKERNEL_BOOT_DIRECTORY, 'results']))
     runtime_file_manager = IOFileManager(
         os.sep.join([conf.CWLKERNEL_BOOT_DIRECTORY, 'runtime_data']))
     self._cwl_executor = CoreExecutor(runtime_file_manager)
     self._pid = (os.getpid(), os.getppid())
     self._cwl_logger = CWLLogger(
         os.path.join(conf.CWLKERNEL_BOOT_DIRECTORY, 'logs'))
     self._set_process_ids()
     self._cwl_logger.save()
Esempio n. 5
0
 def test_get_running_kernels(self):
     import subprocess
     kernel = subprocess.Popen(["python", "-m", "cwlkernel"])
     pids = []
     try:
         pids = CWLLogger.get_running_kernels()
     except Exception:
         pass
     finally:
         kernel.kill()
     self.assertIn(kernel.pid, pids)
Esempio n. 6
0
 def test_get_running_kernels(self):
     import subprocess
     import sys
     kernel = subprocess.Popen([f"{sys.executable}", "-m", "cwlkernel"])
     pids = []
     try:
         pids = CWLLogger.get_running_kernels()
     except Exception as e:
         print(f'ERROR:::: {e}')
     finally:
         kernel.kill()
     self.assertIn(kernel.pid, pids)
    def test_save(self):
        import jsonschema
        schema_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                                   '..', 'cwlkernel',
                                   'loggerSchema.schema.json')
        with open(schema_path) as f:
            schema = json.load(f)

        storage_path = tempfile.mkdtemp()
        logger_storage_manager = CWLLoggerStorageManager(storage_path)

        new_file = logger_storage_manager.save(
            CWLLogger(storage_path).to_dict())

        self.assertTrue(os.path.isfile(new_file))
        log_data = list(logger_storage_manager.load(1))[0]
        jsonschema.validate(log_data, schema)
Esempio n. 8
0
 def test_get_logs_from_past_kernels(self):
     new_dir = tempfile.mkdtemp()
     logger = CWLLogger(new_dir)
     logger.save()
     self.assertListEqual([logger.to_dict()['process_id']],
                          [l['process_id'] for l in logger.load()])
Esempio n. 9
0
 def test_get_hostname(self):
     logger = CWLLogger(self.tmp_dir)
     hostname = logger.get_hostname()
     self.assertEqual(hostname, socket.gethostname())
     self.assertTrue(len(hostname) > 0)
     self.assertIsInstance(hostname, str)
Esempio n. 10
0
class CWLKernel(Kernel):
    implementation = 'CWLKernel'
    implementation_version = '0.1'
    language_version = '1.0'
    language_info = {
        'name': 'yaml',
        'mimetype': 'text/x-cwl',
        'file_extension': '.cwl',
    }
    banner = "Common Workflow Language"

    _magic_commands = frozenset(['logs', 'data', 'display_data'])

    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        conf = CWLExecuteConfigurator()
        self._yaml_input_data: List[str] = []
        self._results_manager = IOFileManager(
            os.sep.join([conf.CWLKERNEL_BOOT_DIRECTORY, 'results']))
        runtime_file_manager = IOFileManager(
            os.sep.join([conf.CWLKERNEL_BOOT_DIRECTORY, 'runtime_data']))
        self._cwl_executor = CoreExecutor(runtime_file_manager)
        self._pid = (os.getpid(), os.getppid())
        self._cwl_logger = CWLLogger(
            os.path.join(conf.CWLKERNEL_BOOT_DIRECTORY, 'logs'))
        self._set_process_ids()
        self._cwl_logger.save()

    def _set_process_ids(self):
        self._cwl_logger.process_id = {
            "process_id": os.getpid(),
            "parent_process_id": os.getppid()
        }

    def _code_is_valid_yaml(self, code) -> Optional[Dict]:
        yaml = YAML(typ='safe')
        try:
            return yaml.load(code)
        except Exception:
            return None

    def _is_magic_command(self, code: str) -> bool:
        split_code = code.split()
        if len(split_code) < 2:
            return False
        if code.startswith("% ") and code.split()[1] in self._magic_commands:
            return True
        return False

    def do_execute(self,
                   code: str,
                   silent=False,
                   store_history: bool = True,
                   user_expressions=None,
                   allow_stdin: bool = False) -> Dict:
        if self._is_magic_command(code):
            self._execute_magic_command(code)
            return {
                'status': 'ok',
                # The base class increments the execution count
                'execution_count': self.execution_count,
                'payload': [],
                'user_expressions': {},
            }
        else:
            dict_code = self._code_is_valid_yaml(code)
            if dict_code is None:
                return {
                    'status': 'error',
                    # The base class increments the execution count
                    'execution_count': self.execution_count,
                    'payload': [],
                    'user_expressions': {},
                }

        if not self._is_cwl(dict_code):
            exception = self._accumulate_data(code)
        else:
            exception = self._execute_workflow(code)
            self._clear_data()

        status = 'ok' if exception is None else 'error'
        if exception is not None:
            self.send_response(
                self.iopub_socket, 'stream', {
                    'name': 'stderr',
                    'text': f'{type(exception).__name__}: {exception}'
                })
        return {
            'status': status,
            # The base class increments the execution count
            'execution_count': self.execution_count,
            'payload': [],
            'user_expressions': {},
        }

    def _execute_magic_command(self, command: str):
        command = command.split()[1:]
        command_name = command[0]
        args = command[1:]
        getattr(self, f'_execute_magic_{command_name}')(args)

    def _execute_magic_display_data(self, data_name):
        if len(data_name) != 1 or not isinstance(data_name[0], str):
            self._send_error_response(
                'ERROR: you must select an output to display. Correct format:\n % display_data [output name]'
            )
            return
        results = list(
            filter(lambda item: item[1]['id'] == data_name[0],
                   self._results_manager.get_files_registry().items()))
        if len(results) != 1:
            self.send_response(self.iopub_socket, 'stream', {
                'name': 'stderr',
                'text': 'Result not found'
            })
            return
        results = results[0]
        with open(results[0]) as f:
            data = f.read()
        self.send_response(self.iopub_socket, 'stream', {
            'name': 'stdout',
            'text': data
        })

    def _send_error_response(self, text):
        self.send_response(self.iopub_socket, 'stream', {
            'name': 'stdout',
            'text': text
        })

    def _send_json_response(self, json_data: Union[Dict, List]):
        self.send_response(
            self.iopub_socket, 'display_data', {
                'data': {
                    'text/plain': '<IPython.core.display.JSON object>',
                    'application/json': json_data
                },
                'metadata': {
                    'application/json': {
                        'expanded': False,
                        'root': 'root'
                    }
                }
            })

    def _execute_magic_logs(self, limit=None):
        logger.error('Execute logs magic command')
        limit_len = len(limit)
        if limit_len == 0:
            limit = None
        if limit_len > 0:
            limit = limit[0]
        if isinstance(limit, str):
            limit = int(limit)
        self.send_response(
            self.iopub_socket, 'display_data', {
                'data': {
                    'text/plain': '<IPython.core.display.JSON object>',
                    'application/json': list(self._cwl_logger.load(limit))
                },
                'metadata': {
                    'application/json': {
                        'expanded': False,
                        'root': 'root'
                    }
                }
            })

    def _execute_magic_data(self, *args):
        data = "<ul>\n" + '\n'.join([
            f'\t<li><a href="file://{d}" target="_empty">{d}</a></li>'
            for d in self.get_past_results()
        ]) + "\n</ul>"
        self.send_response(
            self.iopub_socket, 'display_data', {
                'data': {
                    'text/html': data
                },
                'metadata': {
                    'application/json': {
                        'expanded': False,
                        'root': 'root'
                    }
                }
            })

    def _accumulate_data(self, code: str) -> Optional[Exception]:
        cwl = self._cwl_executor.file_manager.get_files_uri().path
        try:
            self._cwl_executor.validate_input_files(
                yaml.load(code, Loader=yaml.Loader), cwl)
        except FileNotFoundError as e:
            return e
        self._yaml_input_data.append(code)
        self.send_response(self.iopub_socket, 'stream', {
            'name': 'stdout',
            'text': 'Add data in memory'
        })

    def _clear_data(self):
        self._yaml_input_data = []

    def _execute_workflow(self, code) -> Optional[Exception]:
        self._cwl_executor.set_data(self._yaml_input_data)
        self._cwl_executor.set_workflow(code)
        logger.debug('starting executing workflow ...')
        run_id, results, exception = self._cwl_executor.execute()
        logger.debug(f'\texecution results: {run_id}, {results}, {exception}')
        output_directory_for_that_run = str(run_id)
        for output in results:
            if isinstance(results[output], list):
                for i, output_i in enumerate(results[output]):
                    results[output][i]['id'] = f'{output}_{i+1}'
                    self._results_manager.append_files(
                        [results[output][i]['location']],
                        output_directory_for_that_run,
                        metadata=results[output][i])
            else:
                results[output]['id'] = output
                self._results_manager.append_files(
                    [results[output]['location']],
                    output_directory_for_that_run,
                    metadata=results[output])
        self.send_response(
            self.iopub_socket, 'display_data', {
                'data': {
                    'text/plain': '<IPython.core.display.JSON object>',
                    'application/json': results
                },
                'metadata': {
                    'application/json': {
                        'expanded': False,
                        'root': 'root'
                    }
                }
            })
        if exception is not None:
            logger.debug(f'execution error: {exception}')
            self.send_response(self.iopub_socket, 'stream', {
                'name': 'stderr',
                'text': str(exception)
            })
            return exception

    def get_past_results(self) -> List[str]:
        return self._results_manager.get_files()

    def _is_cwl(self, code: Dict):
        return 'cwlVersion' in code.keys()

    def get_pid(self) -> Tuple[int, int]:
        """
        :return: The process id and his parents id
        """
        return self._pid