Esempio n. 1
0
    def start(self):
        if not _mpi_enabled:
            raise OptionalModuleMissing("mpi4py", "Cannot initialize ExtremeScaleExecutor without mpi4py")
        else:
            # This is only to stop flake8 from complaining
            logger.debug("MPI version :{}".format(mpi4py.__version__))

        super().start()
Esempio n. 2
0
    def __init__(self,
                 hostname,
                 username=None,
                 script_dir=None,
                 envs=None,
                 port=22):
        ''' Initialize a persistent connection to the remote system.
        We should know at this point whether ssh connectivity is possible

        Args:
            - hostname (String) : Hostname

        KWargs:
            - username (string) : Username on remote system
            - script_dir (string) : Full path to a script dir where
              generated scripts could be sent to.
            - envs (dict) : A dictionary of env variables to be set when executing commands
            - port (int) : Port at which the SSHService is running

        Raises:
        '''
        if not _oauth_ssh_enabled:
            raise OptionalModuleMissing(
                ['oauth_ssh'],
                "OauthSSHChannel requires oauth_ssh module and config.")

        self.hostname = hostname
        self.username = username
        self.script_dir = script_dir
        self.port = port
        self.envs = {}
        if envs is not None:
            self.envs = envs

        try:
            access_token = find_access_token(hostname)
        except Exception:
            logger.exception(
                "Failed to find the access token for {}".format(hostname))
            raise

        try:
            self.service = SSHService(hostname, port)
            self.transport = self.service.login(access_token, username)

        except Exception:
            logger.exception(
                "Caught an exception in the OAuth authentication step with {}".
                format(hostname))
            raise

        self.sftp_client = paramiko.SFTPClient.from_transport(self.transport)
Esempio n. 3
0
    def __init__(self,
                 label='ExtremeScaleExecutor',
                 provider=LocalProvider(),
                 launch_cmd=None,
                 address="127.0.0.1",
                 worker_ports=None,
                 worker_port_range=(54000, 55000),
                 interchange_port_range=(55000, 56000),
                 storage_access=None,
                 working_dir=None,
                 worker_debug=False,
                 ranks_per_node=1,
                 heartbeat_threshold=120,
                 heartbeat_period=30,
                 managed=True):

        super().__init__(label=label,
                         provider=provider,
                         launch_cmd=launch_cmd,
                         address=address,
                         worker_ports=worker_ports,
                         worker_port_range=worker_port_range,
                         interchange_port_range=interchange_port_range,
                         storage_access=storage_access,
                         working_dir=working_dir,
                         worker_debug=worker_debug,
                         heartbeat_threshold=heartbeat_threshold,
                         heartbeat_period=heartbeat_period,
                         managed=managed)

        if not _mpi_enabled:
            raise OptionalModuleMissing(
                "mpi4py",
                "Cannot initialize ExtremeScaleExecutor without mpi4py")
        else:
            # This is only to stop flake8 from complaining
            logger.debug("MPI version :{}".format(mpi4py.__version__))

        self.ranks_per_node = ranks_per_node

        logger.debug("Initializing ExtremeScaleExecutor")

        if not launch_cmd:
            self.launch_cmd = (
                "mpiexec -np {ranks_per_node} mpi_worker_pool.py "
                "{debug} "
                "--task_url={task_url} "
                "--result_url={result_url} "
                "--logdir={logdir} "
                "--hb_period={heartbeat_period} "
                "--hb_threshold={heartbeat_threshold} ")
        self.worker_debug = worker_debug
Esempio n. 4
0
    def __init__(self,
                 image: str,
                 namespace: str = 'default',
                 nodes_per_block: int = 1,
                 init_blocks: int = 4,
                 min_blocks: int = 0,
                 max_blocks: int = 10,
                 max_cpu: float = 2,
                 max_mem: str = "500Mi",
                 init_cpu: float = 1,
                 init_mem: str = "250Mi",
                 parallelism: float = 1,
                 worker_init: str = "",
                 pod_name: Optional[str] = None,
                 user_id: Optional[str] = None,
                 group_id: Optional[str] = None,
                 run_as_non_root: bool = False,
                 secret: Optional[str] = None,
                 persistent_volumes: List[Tuple[str, str]] = []) -> None:
        if not _kubernetes_enabled:
            raise OptionalModuleMissing(
                ['kubernetes'],
                "Kubernetes provider requires kubernetes module and config.")
        config.load_kube_config()

        self.namespace = namespace
        self.image = image
        self.nodes_per_block = nodes_per_block
        self.init_blocks = init_blocks
        self.min_blocks = min_blocks
        self.max_blocks = max_blocks
        self.max_cpu = max_cpu
        self.max_mem = max_mem
        self.init_cpu = init_cpu
        self.init_mem = init_mem
        self.parallelism = parallelism
        self.worker_init = worker_init
        self.secret = secret
        self.pod_name = pod_name
        self.user_id = user_id
        self.group_id = group_id
        self.run_as_non_root = run_as_non_root
        self.persistent_volumes = persistent_volumes

        self.kube_client = client.CoreV1Api()

        # Dictionary that keeps track of jobs, keyed on job_id
        self.resources = {}  # type: Dict[object, Dict[str, Any]]
Esempio n. 5
0
def get_db_logger(logger_name='parsl_db_logger',
                  is_logging_server=False,
                  monitoring_config=None,
                  **kwargs):
    """
    Parameters
    ----------
    logger_name : str, optional
        Name of the logger to use. Prevents adding repeat handlers or incorrect handlers
    is_logging_server : Bool, optional
        Used internally to determine which handler to return when using local db logging
    monitoring_config : MonitoringConfig, optional
        Pass in a logger class object to use for generating loggers.

    Returns
    -------
    logging.logger object

    Raises
    ------
    OptionalModuleMissing

    """
    logger = logging.getLogger(logger_name)
    if monitoring_config is None:
        logger.addHandler(NullHandler())
        return logger

    if monitoring_config.database_type == 'elasticsearch':
        if not _es_logging_enabled:
            raise OptionalModuleMissing(
                ['CMRESHandler'],
                "Logging to ElasticSearch requires the cmreslogging module")

        handler = CMRESHandler(hosts=[{
            'host': monitoring_config.host,
            'port': monitoring_config.port
        }],
                               use_ssl=monitoring_config.enable_ssl,
                               auth_type=CMRESHandler.AuthType.NO_AUTH,
                               es_index_name=monitoring_config.index_name,
                               es_additional_fields={
                                   'Campaign': "test",
                                   'Version': monitoring_config.version,
                                   'Username': getpass.getuser()
                               })
        logger = logging.getLogger(monitoring_config.logger_name)
        logger.setLevel(logging.INFO)
        logger.addHandler(handler)
    elif monitoring_config.database_type == 'local_database' and not is_logging_server:
        # add a handler that will pass logs to the logging server
        handler = RemoteHandler(monitoring_config.web_app_host,
                                monitoring_config.web_app_port)
        # use the specific name generated by the server or the monitor wrapper
        logger = logging.getLogger(logger_name)
        logger.setLevel(logging.INFO)
        logger.addHandler(handler)
    elif monitoring_config.database_type == 'local_database' and is_logging_server:
        # add a handler that will take logs being recieved on the server and log them to the database
        handler = DatabaseHandler(monitoring_config.eng_link)
        # use the specific name generated by the server or the monitor wrapper
        logger = logging.getLogger(logger_name)
        logger.setLevel(logging.INFO)
        logger.addHandler(handler)
    else:
        raise ValueError(
            'database_type must be one of ["local_database", "elasticsearch"]')

    return logger
Esempio n. 6
0
class Database:

    if not _sqlalchemy_enabled:
        raise OptionalModuleMissing(
            ['sqlalchemy'],
            ("Default database logging requires the sqlalchemy library."
             " Enable monitoring support with: pip install parsl[monitoring]"))
    if not _sqlalchemy_utils_enabled:
        raise OptionalModuleMissing(
            ['sqlalchemy_utils'],
            ("Default database logging requires the sqlalchemy_utils library."
             " Enable monitoring support with: pip install parsl[monitoring]"))

    Base = declarative_base()

    def __init__(
        self,
        url: str = 'sqlite:///monitoring.db',
    ):

        self.eng = sa.create_engine(url)
        self.meta = self.Base.metadata

        self.meta.create_all(self.eng)
        self.meta.reflect(bind=self.eng)

        Session = sessionmaker(bind=self.eng)
        self.session = Session()

    def update(self, *, table: str, columns: List[str],
               messages: List[Dict[str, Any]]) -> None:
        table_obj = self.meta.tables[table]
        mappings = self._generate_mappings(table_obj,
                                           columns=columns,
                                           messages=messages)
        mapper = get_mapper(table_obj)
        self.session.bulk_update_mappings(mapper, mappings)
        self.session.commit()

    def insert(self, *, table: str, messages: List[Dict[str, Any]]) -> None:
        table_obj = self.meta.tables[table]
        mappings = self._generate_mappings(table_obj, messages=messages)
        mapper = get_mapper(table_obj)
        self.session.bulk_insert_mappings(mapper, mappings)
        self.session.commit()

    def rollback(self) -> None:
        self.session.rollback()

    def _generate_mappings(
            self,
            table: Table,
            columns: Optional[List[str]] = None,
            messages: List[Dict[str, Any]] = []) -> List[Dict[str, Any]]:
        mappings = []
        for msg in messages:
            m = {}
            if columns is None:
                columns = table.c.keys()
            for column in columns:
                m[column] = msg.get(column, None)
            mappings.append(m)
        return mappings

    class Workflow(Base):
        __tablename__ = WORKFLOW
        run_id = Column(Text, nullable=False, primary_key=True)
        workflow_name = Column(Text, nullable=True)
        workflow_version = Column(Text, nullable=True)
        time_began = Column(DateTime, nullable=False)
        time_completed = Column(DateTime, nullable=True)
        host = Column(Text, nullable=False)
        user = Column(Text, nullable=False)
        rundir = Column(Text, nullable=False)
        tasks_failed_count = Column(Integer, nullable=False)
        tasks_completed_count = Column(Integer, nullable=False)

    class Status(Base):
        __tablename__ = STATUS
        task_id = Column(Integer,
                         sa.ForeignKey('task.task_id'),
                         nullable=False)
        task_status_name = Column(Text, nullable=False)
        timestamp = Column(DateTime, nullable=False)
        run_id = Column(Text, sa.ForeignKey('workflow.run_id'), nullable=False)
        try_id = Column('try_id', Integer, nullable=False)
        __table_args__ = (PrimaryKeyConstraint('task_id', 'run_id',
                                               'task_status_name',
                                               'timestamp'), )

    class Task(Base):
        __tablename__ = TASK
        task_id = Column('task_id', Integer, nullable=False)
        run_id = Column('run_id', Text, nullable=False)
        task_depends = Column('task_depends', Text, nullable=True)
        task_func_name = Column('task_func_name', Text, nullable=False)
        task_memoize = Column('task_memoize', Text, nullable=False)
        task_hashsum = Column('task_hashsum', Text, nullable=True)
        task_inputs = Column('task_inputs', Text, nullable=True)
        task_outputs = Column('task_outputs', Text, nullable=True)
        task_stdin = Column('task_stdin', Text, nullable=True)
        task_stdout = Column('task_stdout', Text, nullable=True)
        task_stderr = Column('task_stderr', Text, nullable=True)

        task_time_invoked = Column('task_time_invoked',
                                   DateTime,
                                   nullable=True)

        task_time_returned = Column('task_time_returned',
                                    DateTime,
                                    nullable=True)

        task_fail_count = Column('task_fail_count', Integer, nullable=False)

        __table_args__ = (PrimaryKeyConstraint('task_id', 'run_id'), )

    class Try(Base):
        __tablename__ = TRY
        try_id = Column('try_id', Integer, nullable=False)
        task_id = Column('task_id', Integer, nullable=False)
        run_id = Column('run_id', Text, nullable=False)

        hostname = Column('hostname', Text, nullable=True)

        task_executor = Column('task_executor', Text, nullable=False)

        task_try_time_launched = Column('task_try_time_launched',
                                        DateTime,
                                        nullable=True)

        task_try_time_running = Column('task_try_time_running',
                                       DateTime,
                                       nullable=True)

        task_try_time_returned = Column('task_try_time_returned',
                                        DateTime,
                                        nullable=True)

        task_fail_history = Column('task_fail_history', Text, nullable=True)

        task_joins = Column('task_joins', Text, nullable=True)

        __table_args__ = (PrimaryKeyConstraint('try_id', 'task_id',
                                               'run_id'), )

    class Node(Base):
        __tablename__ = NODE
        id = Column('id',
                    Integer,
                    nullable=False,
                    primary_key=True,
                    autoincrement=True)
        run_id = Column('run_id', Text, nullable=False)
        hostname = Column('hostname', Text, nullable=False)
        uid = Column('uid', Text, nullable=False)
        block_id = Column('block_id', Text, nullable=False)
        cpu_count = Column('cpu_count', Integer, nullable=False)
        total_memory = Column('total_memory', Integer, nullable=False)
        active = Column('active', Boolean, nullable=False)
        worker_count = Column('worker_count', Integer, nullable=False)
        python_v = Column('python_v', Text, nullable=False)
        timestamp = Column('timestamp', DateTime, nullable=False)
        last_heartbeat = Column('last_heartbeat', DateTime, nullable=False)

    class Block(Base):
        __tablename__ = BLOCK
        run_id = Column('run_id', Text, nullable=False)
        executor_label = Column('executor_label', Text, nullable=False)
        block_id = Column('block_id', Text, nullable=False)
        job_id = Column('job_id', Text, nullable=True)
        timestamp = Column('timestamp', DateTime, nullable=False)
        status = Column("status", Text, nullable=False)
        __table_args__ = (PrimaryKeyConstraint('run_id', 'block_id',
                                               'executor_label',
                                               'timestamp'), )

    class Resource(Base):
        __tablename__ = RESOURCE
        try_id = Column('try_id',
                        Integer,
                        sa.ForeignKey('try.try_id'),
                        nullable=False)
        task_id = Column('task_id',
                         Integer,
                         sa.ForeignKey('task.task_id'),
                         nullable=False)
        run_id = Column('run_id',
                        Text,
                        sa.ForeignKey('workflow.run_id'),
                        nullable=False)
        timestamp = Column('timestamp', DateTime, nullable=False)
        resource_monitoring_interval = Column('resource_monitoring_interval',
                                              Float,
                                              nullable=True)
        psutil_process_pid = Column('psutil_process_pid',
                                    Integer,
                                    nullable=True)
        psutil_process_cpu_percent = Column('psutil_process_cpu_percent',
                                            Float,
                                            nullable=True)
        psutil_process_memory_percent = Column('psutil_process_memory_percent',
                                               Float,
                                               nullable=True)
        psutil_process_children_count = Column('psutil_process_children_count',
                                               Float,
                                               nullable=True)
        psutil_process_time_user = Column('psutil_process_time_user',
                                          Float,
                                          nullable=True)
        psutil_process_time_system = Column('psutil_process_time_system',
                                            Float,
                                            nullable=True)
        psutil_process_memory_virtual = Column('psutil_process_memory_virtual',
                                               Float,
                                               nullable=True)
        psutil_process_memory_resident = Column(
            'psutil_process_memory_resident', Float, nullable=True)
        psutil_process_disk_read = Column('psutil_process_disk_read',
                                          Float,
                                          nullable=True)
        psutil_process_disk_write = Column('psutil_process_disk_write',
                                           Float,
                                           nullable=True)
        psutil_process_status = Column('psutil_process_status',
                                       Text,
                                       nullable=True)
        __table_args__ = (PrimaryKeyConstraint('try_id', 'task_id', 'run_id',
                                               'timestamp'), )
Esempio n. 7
0
    def __init__(self,
                 vm_reference,
                 init_blocks=1,
                 min_blocks=0,
                 max_blocks=10,
                 parallelism=1,
                 worker_init='',
                 location='westus',
                 group_name='parsl.group',
                 key_name=None,
                 key_file=None,
                 vnet_name="parsl.vnet",
                 linger=False,
                 launcher=SingleNodeLauncher()):
        if not _api_enabled:
            raise OptionalModuleMissing(
                ['azure', 'msrestazure'],
                "Azure Provider requires the azure module.")

        self._label = 'azure'
        self.init_blocks = init_blocks
        self.min_blocks = min_blocks
        self.max_blocks = max_blocks
        self.max_nodes = max_blocks
        self.parallelism = parallelism
        self.nodes_per_block = 1

        self.worker_init = worker_init
        self.vm_reference = vm_reference
        self.region = location
        self.vnet_name = vnet_name

        self.key_name = key_name
        self.key_file = key_file
        self.location = location
        self.group_name = group_name

        self.launcher = launcher
        self.linger = linger
        self.resources = {}
        self.instances = []

        env_specified = os.getenv("AZURE_CLIENT_ID") is not None and os.getenv(
            "AZURE_CLIENT_SECRET") is not None and os.getenv(
                "AZURE_TENANT_ID") is not None and os.getenv(
                    "AZURE_SUBSCRIPTION_ID") is not None

        if key_file is None and not env_specified:
            raise ConfigurationError(
                ("Must specify either: 'key_file', or "
                 "`AZURE_CLIENT_ID`, `AZURE_CLIENT_SECRET`, "
                 "and `AZURE_TENANT_ID` environment variables."))

        if key_file is None:
            self.clientid = os.getenv("AZURE_CLIENT_ID")
            self.clientsecret = os.getenv("AZURE_CLIENT_SECRET")
            self.tenantid = os.getenv("AZURE_TENANT_ID")
            self.subid = os.getenv("AZURE_SUBSCRIPTION_ID")
        else:
            with open(key_file) as fh:
                keys = json.load(fh)
                self.clientid = keys.get("AZURE_CLIENT_ID")
                self.clientsecret = keys.get("AZURE_CLIENT_SECRET")
                self.tenantid = keys.get("AZURE_TENANT_ID")
                self.subid = keys.get("AZURE_SUBSCRIPTION_ID")

        self.get_clients()
Esempio n. 8
0
    def __init__(self,
                 label: str = "WorkQueueExecutor",
                 provider: ExecutionProvider = LocalProvider(),
                 working_dir: str = ".",
                 managed: bool = True,
                 project_name: Optional[str] = None,
                 project_password_file: Optional[str] = None,
                 address: Optional[str] = None,
                 port: int = WORK_QUEUE_DEFAULT_PORT,
                 env: Optional[Dict] = None,
                 shared_fs: bool = False,
                 storage_access: Optional[List[Staging]] = None,
                 use_cache: bool = False,
                 source: bool = False,
                 pack: bool = False,
                 extra_pkgs: Optional[List[str]] = None,
                 autolabel: bool = False,
                 autolabel_window: int = 1,
                 autocategory: bool = True,
                 max_retries: Optional[int] = 1,
                 init_command: str = "",
                 worker_options: str = "",
                 full_debug: bool = True,
                 worker_executable: str = 'work_queue_worker'):
        NoStatusHandlingExecutor.__init__(self)
        self._provider = provider
        self._scaling_enabled = True

        if not _work_queue_enabled:
            raise OptionalModuleMissing(
                ['work_queue'],
                "WorkQueueExecutor requires the work_queue module.")

        self.label = label
        self.managed = managed
        self.task_queue = multiprocessing.Queue(
        )  # type: multiprocessing.Queue
        self.collector_queue = multiprocessing.Queue(
        )  # type: multiprocessing.Queue
        self.blocks = {}  # type: Dict[str, str]
        self.address = address
        self.port = port
        self.task_counter = -1
        self.project_name = project_name
        self.project_password_file = project_password_file
        self.env = env
        self.init_command = init_command
        self.shared_fs = shared_fs
        self.storage_access = storage_access
        self.use_cache = use_cache
        self.working_dir = working_dir
        self.registered_files = set()  # type: Set[str]
        self.full_debug = full_debug
        self.source = True if pack else source
        self.pack = pack
        self.extra_pkgs = extra_pkgs or []
        self.autolabel = autolabel
        self.autolabel_window = autolabel_window
        self.autocategory = autocategory
        self.max_retries = max_retries
        self.should_stop = multiprocessing.Value(c_bool, False)
        self.cached_envs = {}  # type: Dict[int, str]
        self.worker_options = worker_options
        self.worker_executable = worker_executable

        if not self.address:
            self.address = socket.gethostname()

        if self.project_password_file is not None and not os.path.exists(
                self.project_password_file):
            raise WorkQueueFailure('Could not find password file: {}'.format(
                self.project_password_file))

        if self.project_password_file is not None:
            if os.path.exists(self.project_password_file) is False:
                logger.debug("Password File does not exist, no file used")
                self.project_password_file = None

        # Build foundations of the launch command
        self.launch_cmd = (
            "{package_prefix}python3 exec_parsl_function.py {mapping} {function} {result}"
        )
        if self.init_command != "":
            self.launch_cmd = self.init_command + "; " + self.launch_cmd
Esempio n. 9
0
    def __init__(self,
                 image_id,
                 key_name,
                 init_blocks=1,
                 min_blocks=0,
                 max_blocks=10,
                 nodes_per_block=1,
                 parallelism=1,
                 worker_init='',
                 instance_type='t2.small',
                 region='us-east-2',
                 spot_max_bid=0,
                 key_file=None,
                 profile=None,
                 iam_instance_profile_arn='',
                 state_file=None,
                 walltime="01:00:00",
                 linger=False,
                 launcher=SingleNodeLauncher()):
        if not _boto_enabled:
            raise OptionalModuleMissing(
                ['boto3'], "AWS Provider requires the boto3 module.")

        self.image_id = image_id
        self._label = 'ec2'
        self.init_blocks = init_blocks
        self.min_blocks = min_blocks
        self.max_blocks = max_blocks
        self.nodes_per_block = nodes_per_block
        self.max_nodes = max_blocks * nodes_per_block
        self.parallelism = parallelism

        self.worker_init = worker_init
        self.instance_type = instance_type
        self.region = region
        self.spot_max_bid = spot_max_bid

        self.key_name = key_name
        self.key_file = key_file
        self.profile = profile
        self.iam_instance_profile_arn = iam_instance_profile_arn

        self.walltime = walltime
        self.launcher = launcher
        self.linger = linger
        self.resources = {}
        self.state_file = state_file if state_file is not None else 'awsproviderstate.json'

        env_specified = os.getenv(
            "AWS_ACCESS_KEY_ID") is not None and os.getenv(
                "AWS_SECRET_ACCESS_KEY") is not None
        if profile is None and key_file is None and not env_specified:
            raise ConfigurationError(
                "Must specify either profile', 'key_file', or "
                "'AWS_ACCESS_KEY_ID' and 'AWS_SECRET_ACCESS_KEY' environment variables."
            )

        try:
            self.initialize_boto_client()
        except Exception as e:
            logger.error("{} failed to initialize.".format(self))
            raise e

        state_file_exists = False
        try:
            self.read_state_file(self.state_file)
            state_file_exists = True
        except Exception:
            logger.info(
                "No state file found. Cannot load previous options. Creating new infrastructure."
            )

        if not state_file_exists:
            try:
                self.create_vpc().id
            except Exception as e:
                logger.info(
                    "Failed to create ec2 infrastructure: {0}".format(e))
                raise
            else:
                self.write_state_file()
Esempio n. 10
0
class Database:

    if not _sqlalchemy_enabled:
        raise OptionalModuleMissing(
            ['sqlalchemy'],
            ("Default database logging requires the sqlalchemy library."
             " Enable monitoring support with: pip install 'parsl[monitoring]'"
             ))
    Base = declarative_base()

    def __init__(
        self,
        url: str = 'sqlite:///runinfomonitoring.db',
    ):

        self.eng = sa.create_engine(url)
        self.meta = self.Base.metadata

        # TODO: this code wants a read lock on the sqlite3 database, and fails if it cannot
        # - for example, if someone else is querying the database at the point that the
        # monitoring system is initialized. See PR #1917 for related locked-for-read fixes
        # elsewhere in this file.
        self.meta.create_all(self.eng)

        self.meta.reflect(bind=self.eng)

        Session = sessionmaker(bind=self.eng)
        self.session = Session()

    def _get_mapper(self, table_obj: Table) -> Mapper:
        if hasattr(mapperlib, '_all_registries'):
            all_mappers = set()
            for mapper_registry in mapperlib._all_registries():  # type: ignore
                all_mappers.update(mapper_registry.mappers)
        else:  # SQLAlchemy <1.4
            all_mappers = mapperlib._mapper_registry  # type: ignore
        mapper_gen = (mapper for mapper in all_mappers
                      if table_obj in mapper.tables)
        try:
            mapper = next(mapper_gen)
            second_mapper = next(mapper_gen, False)
        except StopIteration:
            raise ValueError(f"Could not get mapper for table {table_obj}")

        if second_mapper:
            raise ValueError(f"Multiple mappers for table {table_obj}")
        return mapper

    def update(self, *, table: str, columns: List[str],
               messages: List[MonitoringMessage]) -> None:
        table_obj = self.meta.tables[table]
        mappings = self._generate_mappings(table_obj,
                                           columns=columns,
                                           messages=messages)
        mapper = self._get_mapper(table_obj)
        self.session.bulk_update_mappings(mapper, mappings)
        self.session.commit()

    def insert(self, *, table: str, messages: List[MonitoringMessage]) -> None:
        table_obj = self.meta.tables[table]
        mappings = self._generate_mappings(table_obj, messages=messages)
        mapper = self._get_mapper(table_obj)
        self.session.bulk_insert_mappings(mapper, mappings)
        self.session.commit()

    def rollback(self) -> None:
        self.session.rollback()

    def _generate_mappings(
            self,
            table: Table,
            columns: Optional[List[str]] = None,
            messages: List[MonitoringMessage] = []) -> List[Dict[str, Any]]:
        mappings = []
        for msg in messages:
            m = {}
            if columns is None:
                columns = table.c.keys()
            for column in columns:
                m[column] = msg.get(column, None)
            mappings.append(m)
        return mappings

    class Workflow(Base):
        __tablename__ = WORKFLOW
        run_id = Column(Text, nullable=False, primary_key=True)
        workflow_name = Column(Text, nullable=True)
        workflow_version = Column(Text, nullable=True)
        time_began = Column(DateTime, nullable=False)
        time_completed = Column(DateTime, nullable=True)
        host = Column(Text, nullable=False)
        user = Column(Text, nullable=False)
        rundir = Column(Text, nullable=False)
        tasks_failed_count = Column(Integer, nullable=False)
        tasks_completed_count = Column(Integer, nullable=False)

    class Status(Base):
        __tablename__ = STATUS
        task_id = Column(Integer,
                         sa.ForeignKey('task.task_id'),
                         nullable=False)
        task_status_name = Column(Text, nullable=False)
        timestamp = Column(DateTime, nullable=False)
        run_id = Column(Text, sa.ForeignKey('workflow.run_id'), nullable=False)
        try_id = Column('try_id', Integer, nullable=False)
        __table_args__ = (PrimaryKeyConstraint('task_id', 'run_id',
                                               'task_status_name',
                                               'timestamp'), )

    class Task(Base):
        __tablename__ = TASK
        task_id = Column('task_id', Integer, nullable=False)
        run_id = Column('run_id', Text, nullable=False)
        task_depends = Column('task_depends', Text, nullable=True)
        task_func_name = Column('task_func_name', Text, nullable=False)
        task_memoize = Column('task_memoize', Text, nullable=False)
        task_hashsum = Column('task_hashsum', Text, nullable=True, index=True)
        task_inputs = Column('task_inputs', Text, nullable=True)
        task_outputs = Column('task_outputs', Text, nullable=True)
        task_stdin = Column('task_stdin', Text, nullable=True)
        task_stdout = Column('task_stdout', Text, nullable=True)
        task_stderr = Column('task_stderr', Text, nullable=True)

        task_time_invoked = Column('task_time_invoked',
                                   DateTime,
                                   nullable=True)

        task_time_returned = Column('task_time_returned',
                                    DateTime,
                                    nullable=True)

        task_fail_count = Column('task_fail_count', Integer, nullable=False)
        task_fail_cost = Column('task_fail_cost', Float, nullable=False)

        __table_args__ = (PrimaryKeyConstraint('task_id', 'run_id'), )

    class Try(Base):
        __tablename__ = TRY
        try_id = Column('try_id', Integer, nullable=False)
        task_id = Column('task_id', Integer, nullable=False)
        run_id = Column('run_id', Text, nullable=False)

        block_id = Column('block_id', Text, nullable=True)
        hostname = Column('hostname', Text, nullable=True)

        task_executor = Column('task_executor', Text, nullable=False)

        task_try_time_launched = Column('task_try_time_launched',
                                        DateTime,
                                        nullable=True)

        task_try_time_running = Column('task_try_time_running',
                                       DateTime,
                                       nullable=True)

        task_try_time_returned = Column('task_try_time_returned',
                                        DateTime,
                                        nullable=True)

        task_fail_history = Column('task_fail_history', Text, nullable=True)

        task_joins = Column('task_joins', Text, nullable=True)

        __table_args__ = (PrimaryKeyConstraint('try_id', 'task_id',
                                               'run_id'), )

    class Node(Base):
        __tablename__ = NODE
        id = Column('id',
                    Integer,
                    nullable=False,
                    primary_key=True,
                    autoincrement=True)
        run_id = Column('run_id', Text, nullable=False)
        hostname = Column('hostname', Text, nullable=False)
        uid = Column('uid', Text, nullable=False)
        block_id = Column('block_id', Text, nullable=False)
        cpu_count = Column('cpu_count', Integer, nullable=False)
        total_memory = Column('total_memory', Integer, nullable=False)
        active = Column('active', Boolean, nullable=False)
        worker_count = Column('worker_count', Integer, nullable=False)
        python_v = Column('python_v', Text, nullable=False)
        timestamp = Column('timestamp', DateTime, nullable=False)
        last_heartbeat = Column('last_heartbeat', DateTime, nullable=False)

    class Block(Base):
        __tablename__ = BLOCK
        run_id = Column('run_id', Text, nullable=False)
        executor_label = Column('executor_label', Text, nullable=False)
        block_id = Column('block_id', Text, nullable=False)
        job_id = Column('job_id', Text, nullable=True)
        timestamp = Column('timestamp', DateTime, nullable=False)
        status = Column("status", Text, nullable=False)
        __table_args__ = (PrimaryKeyConstraint('run_id', 'block_id',
                                               'executor_label',
                                               'timestamp'), )

    class Resource(Base):
        __tablename__ = RESOURCE
        try_id = Column('try_id',
                        Integer,
                        sa.ForeignKey('try.try_id'),
                        nullable=False)
        task_id = Column('task_id',
                         Integer,
                         sa.ForeignKey('task.task_id'),
                         nullable=False)
        run_id = Column('run_id',
                        Text,
                        sa.ForeignKey('workflow.run_id'),
                        nullable=False)
        timestamp = Column('timestamp', DateTime, nullable=False)
        resource_monitoring_interval = Column('resource_monitoring_interval',
                                              Float,
                                              nullable=True)
        psutil_process_pid = Column('psutil_process_pid',
                                    Integer,
                                    nullable=True)
        psutil_process_memory_percent = Column('psutil_process_memory_percent',
                                               Float,
                                               nullable=True)
        psutil_process_children_count = Column('psutil_process_children_count',
                                               Float,
                                               nullable=True)
        psutil_process_time_user = Column('psutil_process_time_user',
                                          Float,
                                          nullable=True)
        psutil_process_time_system = Column('psutil_process_time_system',
                                            Float,
                                            nullable=True)
        psutil_process_memory_virtual = Column('psutil_process_memory_virtual',
                                               Float,
                                               nullable=True)
        psutil_process_memory_resident = Column(
            'psutil_process_memory_resident', Float, nullable=True)
        psutil_process_disk_read = Column('psutil_process_disk_read',
                                          Float,
                                          nullable=True)
        psutil_process_disk_write = Column('psutil_process_disk_write',
                                           Float,
                                           nullable=True)
        psutil_process_status = Column('psutil_process_status',
                                       Text,
                                       nullable=True)
        __table_args__ = (PrimaryKeyConstraint('try_id', 'task_id', 'run_id',
                                               'timestamp'), )
Esempio n. 11
0
    def __init__(
        self,
        image: str,
        namespace: str = "default",
        nodes_per_block: int = 1,
        init_blocks: int = 0,
        min_blocks: int = 0,
        max_blocks: int = 10,
        max_cpu: float = 2,
        max_mem: str = "500Mi",
        init_cpu: float = 1,
        init_mem: str = "250Mi",
        parallelism: float = 1,
        worker_init: str = "",
        pod_name: Optional[str] = None,
        user_id: Optional[str] = None,
        group_id: Optional[str] = None,
        run_as_non_root: bool = False,
        secret: Optional[str] = None,
        incluster_config: Optional[bool] = True,
        persistent_volumes: Optional[List[Tuple[str, str]]] = None,
    ) -> None:
        if persistent_volumes is None:
            persistent_volumes = []
        if not _kubernetes_enabled:
            raise OptionalModuleMissing(
                ["kubernetes"],
                "Kubernetes provider requires kubernetes module and config.",
            )
        if incluster_config:
            config.load_incluster_config()
        else:
            config.load_kube_config()

        self.namespace = namespace
        self.image = image
        self.nodes_per_block = nodes_per_block
        self.init_blocks = init_blocks

        # Kubernetes provider doesn't really know which pods by container to initialize
        # so best to set init_blocks to 0
        assert init_blocks == 0

        self.min_blocks = min_blocks
        self.max_blocks = max_blocks
        self.max_cpu = max_cpu
        self.max_mem = max_mem
        self.init_cpu = init_cpu
        self.init_mem = init_mem
        self.parallelism = parallelism
        self.worker_init = worker_init
        self.secret = secret
        self.incluster_config = incluster_config
        self.pod_name = pod_name
        self.user_id = user_id
        self.group_id = group_id
        self.run_as_non_root = run_as_non_root
        self.persistent_volumes = persistent_volumes

        self.kube_client = client.CoreV1Api()

        # Dictionary that keeps track of jobs, keyed on job_id
        self.resources_by_pod_name = {}
        # Dictionary that keeps track of jobs, keyed on task_type
        self.resources_by_task_type = {}