def __init__(self, label="WorkQueueExecutor", working_dir=".", managed=True, project_name=None, project_password=None, project_password_file=None, port=WORK_QUEUE_DEFAULT_PORT, env=None, shared_fs=False, init_command="", full_debug=True, see_worker_output=False): if not _work_queue_enabled: raise OptionalModuleMissing( ['work_queue'], "WorkQueueExecutor requires the work_queue module.") self.label = label self.managed = managed self.task_queue = multiprocessing.Queue() self.collector_queue = multiprocessing.Queue() self.tasks = {} self.port = port self.task_counter = -1 self.scaling_enabled = False self.project_name = project_name self.project_password = project_password self.project_password_file = project_password_file self.env = env self.init_command = init_command self.shared_fs = shared_fs self.working_dir = working_dir self.used_names = {} self.shared_files = set() self.registered_files = set() self.worker_output = see_worker_output self.full = full_debug self.cancel_value = multiprocessing.Value('i', 1) # Resolve ambiguity when password and password_file are both specified if self.project_password is not None and self.project_password_file is not None: logger.warning( "Password File and Password text specified for WorkQueue Executor, only Password Text will be used" ) self.project_password_file = None if self.project_password_file is not None: if os.path.exists(self.project_password_file) is False: logger.debug("Password File does not exist, no file used") self.project_password_file = None # Build foundations of the launch command self.launch_cmd = ( "python3 workqueue_worker.py -i {input_file} -o {output_file} {remapping_string}" ) if self.shared_fs is True: self.launch_cmd += " --shared-fs" if self.init_command != "": self.launch_cmd = self.init_command + "; " + self.launch_cmd
def __init__(self, image: str, namespace: str = 'default', nodes_per_block: int = 1, init_blocks: int = 4, min_blocks: int = 0, max_blocks: int = 10, max_cpu: float = 2, max_mem: str = "500Mi", init_cpu: float = 1, init_mem: str = "250Mi", parallelism: float = 1, worker_init: str = "", pod_name: Optional[str] = None, user_id: Optional[str] = None, group_id: Optional[str] = None, run_as_non_root: bool = False, secret: Optional[str] = None, incluster_config: Optional[bool] = True, persistent_volumes: List[Tuple[str, str]] = []) -> None: if not _kubernetes_enabled: raise OptionalModuleMissing( ['kubernetes'], "Kubernetes provider requires kubernetes module and config.") if incluster_config: config.load_incluster_config() else: config.load_kube_config() self.namespace = namespace self.image = image self.nodes_per_block = nodes_per_block self.init_blocks = init_blocks self.min_blocks = min_blocks self.max_blocks = max_blocks self.max_cpu = max_cpu self.max_mem = max_mem self.init_cpu = init_cpu self.init_mem = init_mem self.parallelism = parallelism self.worker_init = worker_init self.secret = secret self.incluster_config = incluster_config self.pod_name = pod_name self.user_id = user_id self.group_id = group_id self.run_as_non_root = run_as_non_root self.persistent_volumes = persistent_volumes self.kube_client = client.CoreV1Api() # Dictionary that keeps track of jobs, keyed on job_id self.resources_by_pod_name = {} # Dictionary that keeps track of jobs, keyed on task_type self.resources_by_task_type = {}
def __init__(self, image_id, key_name, init_blocks=1, min_blocks=0, max_blocks=10, nodes_per_block=1, parallelism=1, worker_init='', instance_type='t2.small', region='us-east-2', spot_max_bid=0, key_file=None, profile=None, iam_instance_profile_arn='', state_file=None, walltime="01:00:00", linger=False, launcher=SingleNodeLauncher()): if not _boto_enabled: raise OptionalModuleMissing( ['boto3'], "AWS Provider requires the boto3 module.") self.image_id = image_id self._label = 'ec2' self.init_blocks = init_blocks self.min_blocks = min_blocks self.max_blocks = max_blocks self.nodes_per_block = nodes_per_block self.max_nodes = max_blocks * nodes_per_block self.parallelism = parallelism self.worker_init = worker_init self.instance_type = instance_type self.region = region self.spot_max_bid = spot_max_bid self.key_name = key_name self.key_file = key_file self.profile = profile self.iam_instance_profile_arn = iam_instance_profile_arn self.walltime = walltime self.launcher = launcher self.linger = linger self.resources = {} self.state_file = state_file if state_file is not None else '.ec2_{}.json'.format( self.label) env_specified = os.getenv( "AWS_ACCESS_KEY_ID") is not None and os.getenv( "AWS_SECRET_ACCESS_KEY") is not None if profile is None and key_file is None and not env_specified: raise ConfigurationError( "Must specify either profile', 'key_file', or " "'AWS_ACCESS_KEY_ID' and 'AWS_SECRET_ACCESS_KEY' environment variables." ) try: self.initialize_boto_client() except Exception as e: logger.error("{} failed to initialize.".format(self)) raise e state_file_exists = False try: self.read_state_file(self.state_file) state_file_exists = True except Exception: logger.info( "No state file found. Cannot load previous options. Creating new infrastructure." ) if not state_file_exists: try: self.create_vpc().id except Exception as e: logger.info( "Failed to create ec2 infrastructure: {0}".format(e)) raise else: self.write_state_file()
def __init__(self, label: str = "WorkQueueExecutor", provider: ExecutionProvider = LocalProvider(), working_dir: str = ".", managed: bool = True, project_name: Optional[str] = None, project_password_file: Optional[str] = None, address: Optional[str] = None, port: int = WORK_QUEUE_DEFAULT_PORT, env: Optional[Dict] = None, shared_fs: bool = False, storage_access: Optional[List[Staging]] = None, use_cache: bool = False, source: bool = False, pack: bool = False, autolabel: bool = False, autolabel_window: int = 1, autocategory: bool = False, init_command: str = "", full_debug: bool = True): NoStatusHandlingExecutor.__init__(self) self._provider = provider self._scaling_enabled = True if not _work_queue_enabled: raise OptionalModuleMissing( ['work_queue'], "WorkQueueExecutor requires the work_queue module.") self.label = label self.managed = managed self.task_queue = multiprocessing.Queue( ) # type: multiprocessing.Queue self.collector_queue = multiprocessing.Queue( ) # type: multiprocessing.Queue self.blocks = {} # type: Dict[str, str] self.address = address self.port = port self.task_counter = -1 self.project_name = project_name self.project_password_file = project_password_file self.env = env self.init_command = init_command self.shared_fs = shared_fs self.storage_access = storage_access self.use_cache = use_cache self.working_dir = working_dir self.registered_files = set() # type: Set[str] self.full = full_debug self.source = True if pack else source self.pack = pack self.autolabel = autolabel self.autolabel_window = autolabel_window self.autocategory = autocategory self.should_stop = multiprocessing.Value(c_bool, False) self.cached_envs = {} # type: Dict[int, str] if not self.address: self.address = socket.gethostname() if self.project_password_file is not None and not os.path.exists( self.project_password_file): raise WorkQueueFailure('Could not find password file: {}'.format( self.project_password_file)) if self.project_password_file is not None: if os.path.exists(self.project_password_file) is False: logger.debug("Password File does not exist, no file used") self.project_password_file = None # Build foundations of the launch command self.launch_cmd = ( "{package_prefix}python3 exec_parsl_function.py {mapping} {function} {result}" ) if self.init_command != "": self.launch_cmd = self.init_command + "; " + self.launch_cmd
def __init__(self, vm_reference, init_blocks=1, min_blocks=0, max_blocks=10, parallelism=1, worker_init='', location='westus', group_name='parsl.auto', key_name=None, key_file=None, vnet_name="parsl.auto", linger=False, launcher=SingleNodeLauncher()): if not _api_enabled: raise OptionalModuleMissing( ['azure', 'msrestazure'], "Azure Provider requires the azure module.") self._label = 'azure' self.init_blocks = init_blocks self.min_blocks = min_blocks self.max_blocks = max_blocks self.max_nodes = max_blocks self.parallelism = parallelism self.nodes_per_block = 1 self.worker_init = worker_init self.vm_reference = vm_reference self.region = location self.vnet_name = vnet_name self.key_name = key_name self.key_file = key_file self.location = location self.group_name = group_name self.launcher = launcher self.linger = linger self.resources = {} self.instances = [] env_specified = os.getenv("AZURE_CLIENT_ID") is not None and os.getenv( "AZURE_CLIENT_SECRET") is not None and os.getenv( "AZURE_TENANT_ID") is not None and os.getenv( "AZURE_SUBSCRIPTION_ID") is not None if key_file is None and not env_specified: raise ConfigurationError("Must specify either, 'key_file', or\ `AZURE_CLIENT_ID`, `AZURE_CLIENT_SECRET`,\ and `AZURE_TENANT_ID` environment variables.") if key_file is None: self.clientid = os.getenv("AZURE_CLIENT_ID") self.clientsecret = os.getenv("AZURE_CLIENT_SECRET") self.tenantid = os.getenv("AZURE_TENANT_ID") self.subid = os.getenv("AZURE_SUBSCRIPTION_ID") else: with open(key_file) as fh: keys = json.load(fh) self.clientid = keys.get("AZURE_CLIENT_ID") self.clientsecret = keys.get("AZURE_CLIENT_SECRET") self.tenantid = keys.get("AZURE_TENANT_ID") self.subid = keys.get("AZURE_SUBSCRIPTION_ID") self.get_clients()
def __init__(self, config, poolname): self.config = config self.blocks = {} self.pool = poolname controller_file = "~/.ipython/profile_default/security/ipcontroller-engine.json" if not _nova_enabled: raise OptionalModuleMissing( ['python-novaclient'], "Jetstream Provider requires the python-novaclient module.") self.client = client.Client( api_versions.APIVersion("2.0"), config['sites.jetstream']['OS_USERNAME'], config['sites.jetstream']['OS_PASSWORD'], project_id=config['sites.jetstream']['OS_PROJECT_ID'], project_name=config['sites.jetstream']['OS_PROJECT_NAME'], auth_url=config['sites.jetstream']['OS_AUTH_URL'], insecure=False, region_name=config['sites.jetstream']['OS_REGION_NAME'], user_domain_name=config['sites.jetstream']['OS_USER_DOMAIN_NAME']) api_version = api_versions.get_api_version("2.0") api_version = api_versions.discover_version(self.client, api_version) client.discover_extensions(api_version) logger.debug(self.client.has_neutron()) self.server_manager = self.client.servers try: with open(os.path.expanduser(controller_file), 'r') as f: self.engine_config = f.read() except FileNotFoundError: logger.error("No controller_file found at : %s. Cannot proceed", controller_file) exit(-1) except Exception as e: logger.error( "Caught exception while reading from the ipcontroller_engine.json" ) raise e try: # Check if the authentication worked by forcing a call self.server_manager.list() except Exception as e: logger.error("Caught exception : %s", e) raise e flavors = self.client.flavors.list() try: self.flavor = [ f for f in flavors if f.name == config[ 'sites.jetstream.{0}'.format(poolname)]['flavor'] ][0] except Exception as e: logger.error("Caught exception : ", e) raise e self.sec_groups = ast.literal_eval( config['sites.jetstream.{0}'.format(poolname)]['sec_groups']) self.nics = ast.literal_eval( config['sites.jetstream.{0}'.format(poolname)]['nics'])
class Database: if not _sqlalchemy_enabled: raise OptionalModuleMissing( ['sqlalchemy'], ("Default database logging requires the sqlalchemy library." " Enable monitoring support with: pip install parsl[monitoring]")) if not _sqlalchemy_utils_enabled: raise OptionalModuleMissing( ['sqlalchemy_utils'], ("Default database logging requires the sqlalchemy_utils library." " Enable monitoring support with: pip install parsl[monitoring]")) Base = declarative_base() def __init__( self, url: str = 'sqlite:///monitoring.db', ): self.eng = sa.create_engine(url) self.meta = self.Base.metadata self.meta.create_all(self.eng) self.meta.reflect(bind=self.eng) Session = sessionmaker(bind=self.eng) self.session = Session() def update(self, *, table: str, columns: List[str], messages: List[Dict[str, Any]]) -> None: table_obj = self.meta.tables[table] mappings = self._generate_mappings(table_obj, columns=columns, messages=messages) mapper = get_mapper(table_obj) self.session.bulk_update_mappings(mapper, mappings) self.session.commit() def insert(self, *, table: str, messages: List[Dict[str, Any]]) -> None: table_obj = self.meta.tables[table] mappings = self._generate_mappings(table_obj, messages=messages) mapper = get_mapper(table_obj) self.session.bulk_insert_mappings(mapper, mappings) self.session.commit() def rollback(self) -> None: self.session.rollback() def _generate_mappings( self, table: Table, columns: Optional[List[str]] = None, messages: List[Dict[str, Any]] = []) -> List[Dict[str, Any]]: mappings = [] for msg in messages: m = {} if columns is None: columns = table.c.keys() for column in columns: m[column] = msg.get(column, None) mappings.append(m) return mappings class Workflow(Base): __tablename__ = WORKFLOW run_id = Column(Text, nullable=False, primary_key=True) workflow_name = Column(Text, nullable=True) workflow_version = Column(Text, nullable=True) time_began = Column(DateTime, nullable=False) time_completed = Column(DateTime, nullable=True) host = Column(Text, nullable=False) user = Column(Text, nullable=False) rundir = Column(Text, nullable=False) tasks_failed_count = Column(Integer, nullable=False) tasks_completed_count = Column(Integer, nullable=False) class Status(Base): __tablename__ = STATUS task_id = Column(Integer, sa.ForeignKey('task.task_id'), nullable=False) task_status_name = Column(Text, nullable=False) timestamp = Column(DateTime, nullable=False) run_id = Column(Text, sa.ForeignKey('workflow.run_id'), nullable=False) try_id = Column('try_id', Integer, nullable=False) __table_args__ = (PrimaryKeyConstraint('task_id', 'run_id', 'task_status_name', 'timestamp'), ) class Task(Base): __tablename__ = TASK task_id = Column('task_id', Integer, nullable=False) run_id = Column('run_id', Text, nullable=False) task_depends = Column('task_depends', Text, nullable=True) task_func_name = Column('task_func_name', Text, nullable=False) task_memoize = Column('task_memoize', Text, nullable=False) task_hashsum = Column('task_hashsum', Text, nullable=True) task_inputs = Column('task_inputs', Text, nullable=True) task_outputs = Column('task_outputs', Text, nullable=True) task_stdin = Column('task_stdin', Text, nullable=True) task_stdout = Column('task_stdout', Text, nullable=True) task_stderr = Column('task_stderr', Text, nullable=True) task_time_invoked = Column('task_time_invoked', DateTime, nullable=True) task_time_returned = Column('task_time_returned', DateTime, nullable=True) task_fail_count = Column('task_fail_count', Integer, nullable=False) __table_args__ = (PrimaryKeyConstraint('task_id', 'run_id'), ) class Try(Base): __tablename__ = TRY try_id = Column('try_id', Integer, nullable=False) task_id = Column('task_id', Integer, nullable=False) run_id = Column('run_id', Text, nullable=False) hostname = Column('hostname', Text, nullable=True) task_executor = Column('task_executor', Text, nullable=False) task_try_time_launched = Column('task_try_time_launched', DateTime, nullable=True) task_try_time_running = Column('task_try_time_running', DateTime, nullable=True) task_try_time_returned = Column('task_try_time_returned', DateTime, nullable=True) task_fail_history = Column('task_fail_history', Text, nullable=True) task_joins = Column('task_joins', Text, nullable=True) __table_args__ = (PrimaryKeyConstraint('try_id', 'task_id', 'run_id'), ) class Node(Base): __tablename__ = NODE id = Column('id', Integer, nullable=False, primary_key=True, autoincrement=True) run_id = Column('run_id', Text, nullable=False) hostname = Column('hostname', Text, nullable=False) uid = Column('uid', Text, nullable=False) block_id = Column('block_id', Text, nullable=False) cpu_count = Column('cpu_count', Integer, nullable=False) total_memory = Column('total_memory', Integer, nullable=False) active = Column('active', Boolean, nullable=False) worker_count = Column('worker_count', Integer, nullable=False) python_v = Column('python_v', Text, nullable=False) timestamp = Column('timestamp', DateTime, nullable=False) last_heartbeat = Column('last_heartbeat', DateTime, nullable=False) class Block(Base): __tablename__ = BLOCK run_id = Column('run_id', Text, nullable=False) executor_label = Column('executor_label', Text, nullable=False) block_id = Column('block_id', Text, nullable=False) job_id = Column('job_id', Text, nullable=True) timestamp = Column('timestamp', DateTime, nullable=False) status = Column("status", Text, nullable=False) __table_args__ = (PrimaryKeyConstraint('run_id', 'block_id', 'executor_label', 'timestamp'), ) class Resource(Base): __tablename__ = RESOURCE try_id = Column('try_id', Integer, sa.ForeignKey('try.try_id'), nullable=False) task_id = Column('task_id', Integer, sa.ForeignKey('task.task_id'), nullable=False) run_id = Column('run_id', Text, sa.ForeignKey('workflow.run_id'), nullable=False) timestamp = Column('timestamp', DateTime, nullable=False) resource_monitoring_interval = Column('resource_monitoring_interval', Float, nullable=True) psutil_process_pid = Column('psutil_process_pid', Integer, nullable=True) psutil_process_cpu_percent = Column('psutil_process_cpu_percent', Float, nullable=True) psutil_process_memory_percent = Column('psutil_process_memory_percent', Float, nullable=True) psutil_process_children_count = Column('psutil_process_children_count', Float, nullable=True) psutil_process_time_user = Column('psutil_process_time_user', Float, nullable=True) psutil_process_time_system = Column('psutil_process_time_system', Float, nullable=True) psutil_process_memory_virtual = Column('psutil_process_memory_virtual', Float, nullable=True) psutil_process_memory_resident = Column( 'psutil_process_memory_resident', Float, nullable=True) psutil_process_disk_read = Column('psutil_process_disk_read', Float, nullable=True) psutil_process_disk_write = Column('psutil_process_disk_write', Float, nullable=True) psutil_process_status = Column('psutil_process_status', Text, nullable=True) __table_args__ = (PrimaryKeyConstraint('try_id', 'task_id', 'run_id', 'timestamp'), )
class Database(object): if not _sqlalchemy_enabled: raise OptionalModuleMissing( ['sqlalchemy'], ("Default database logging requires the sqlalchemy library." " Enable monitoring support with: pip install parsl[monitoring]")) if not _sqlalchemy_utils_enabled: raise OptionalModuleMissing( ['sqlalchemy_utils'], ("Default database logging requires the sqlalchemy_utils library." " Enable monitoring support with: pip install parsl[monitoring]")) Base = declarative_base() def __init__( self, url='sqlite:///monitoring.db', username=None, password=None, ): self.eng = sa.create_engine(url) self.meta = self.Base.metadata self.meta.create_all(self.eng) self.meta.reflect(bind=self.eng) Session = sessionmaker(bind=self.eng) self.session = Session() def update(self, table=None, columns=None, messages=None): table = self.meta.tables[table] mappings = self._generate_mappings(table, columns=columns, messages=messages) mapper = get_mapper(table) self.session.bulk_update_mappings(mapper, mappings) self.session.commit() def insert(self, table=None, messages=None): table = self.meta.tables[table] mappings = self._generate_mappings(table, messages=messages) mapper = get_mapper(table) self.session.bulk_insert_mappings(mapper, mappings) self.session.commit() def _generate_mappings(self, table, columns=None, messages=[]): mappings = [] for msg in messages: m = {} if columns is None: columns = table.c.keys() for column in columns: m[column] = msg.get(column, None) mappings.append(m) return mappings class Workflow(Base): __tablename__ = WORKFLOW run_id = Column(Text, nullable=False, primary_key=True) workflow_name = Column(Text, nullable=True) workflow_version = Column(Text, nullable=True) time_began = Column(DateTime, nullable=False) time_completed = Column(DateTime, nullable=True) workflow_duration = Column(Float, nullable=True) host = Column(Text, nullable=False) user = Column(Text, nullable=False) rundir = Column(Text, nullable=False) tasks_failed_count = Column(Integer, nullable=False) tasks_completed_count = Column(Integer, nullable=False) # TODO: expand to full set of info class Status(Base): __tablename__ = STATUS task_id = Column(Integer, sa.ForeignKey('task.task_id'), nullable=False) task_status_name = Column(Text, nullable=False) timestamp = Column(DateTime, nullable=False) run_id = Column(Text, sa.ForeignKey('workflow.run_id'), nullable=False) hostname = Column('hostname', Text, nullable=True) __table_args__ = (PrimaryKeyConstraint('task_id', 'run_id', 'task_status_name', 'timestamp'), ) class Task(Base): __tablename__ = TASK task_id = Column('task_id', Integer, nullable=False) run_id = Column('run_id', Text, nullable=False) hostname = Column('hostname', Text, nullable=True) task_depends = Column('task_depends', Text, nullable=True) task_executor = Column('task_executor', Text, nullable=False) task_func_name = Column('task_func_name', Text, nullable=False) task_time_submitted = Column('task_time_submitted', DateTime, nullable=True) task_time_running = Column('task_time_running', DateTime, nullable=True) task_time_returned = Column('task_time_returned', DateTime, nullable=True) task_elapsed_time = Column('task_elapsed_time', Float, nullable=True) task_memoize = Column('task_memoize', Text, nullable=False) task_inputs = Column('task_inputs', Text, nullable=True) task_outputs = Column('task_outputs', Text, nullable=True) task_stdin = Column('task_stdin', Text, nullable=True) task_stdout = Column('task_stdout', Text, nullable=True) task_stderr = Column('task_stderr', Text, nullable=True) task_fail_count = Column('task_fail_count', Integer, nullable=False) task_fail_history = Column('task_fail_history', Text, nullable=True) __table_args__ = (PrimaryKeyConstraint('task_id', 'run_id'), ) class Node(Base): __tablename__ = NODE id = Column('id', Integer, nullable=False, primary_key=True, autoincrement=True) run_id = Column('run_id', Text, nullable=False) hostname = Column('hostname', Text, nullable=False) cpu_count = Column('cpu_count', Integer, nullable=False) total_memory = Column('total_memory', Integer, nullable=False) active = Column('active', Boolean, nullable=False) worker_count = Column('worker_count', Integer, nullable=False) python_v = Column('python_v', Text, nullable=False) reg_time = Column('reg_time', DateTime, nullable=False) class Resource(Base): __tablename__ = RESOURCE task_id = Column('task_id', Integer, sa.ForeignKey('task.task_id'), nullable=False) timestamp = Column('timestamp', DateTime, nullable=False) run_id = Column('run_id', Text, sa.ForeignKey('workflow.run_id'), nullable=False) resource_monitoring_interval = Column('resource_monitoring_interval', Float, nullable=True) psutil_process_pid = Column('psutil_process_pid', Integer, nullable=True) psutil_process_cpu_percent = Column('psutil_process_cpu_percent', Float, nullable=True) psutil_process_memory_percent = Column('psutil_process_memory_percent', Float, nullable=True) psutil_process_children_count = Column('psutil_process_children_count', Float, nullable=True) psutil_process_time_user = Column('psutil_process_time_user', Float, nullable=True) psutil_process_time_system = Column('psutil_process_time_system', Float, nullable=True) psutil_process_memory_virtual = Column('psutil_process_memory_virtual', Float, nullable=True) psutil_process_memory_resident = Column( 'psutil_process_memory_resident', Float, nullable=True) psutil_process_disk_read = Column('psutil_process_disk_read', Float, nullable=True) psutil_process_disk_write = Column('psutil_process_disk_write', Float, nullable=True) psutil_process_status = Column('psutil_process_status', Text, nullable=True) __table_args__ = (PrimaryKeyConstraint('task_id', 'run_id', 'timestamp'), )