def _create_data_provider(self): """ Create a data provider according to the configuration. :return: DataProvider """ conf = self._conf.get("data", default=Data.element) name = conf.get("type", "files") if name == "files" and "path" not in conf: conf["path"] = os.path.join(self._work_path, "data_{}".format(name)) return data_provider_factory.create(conf, type=name, logger=self._log)
def __init__(self): # Get task key and storage configuration cmd_conf = OptionsConfig( required=["case", "task", "index", "data.type", "storage.type"]) # Register signals self._signal_names = {} for signame in [x for x in dir(signal) if x.startswith("SIG")]: try: signum = getattr(signal, signame) signal.signal(signum, self.__signal_handler) self._signal_names[signum] = signame except: pass # command line configuration case_name = cmd_conf["case"] task_cname = cmd_conf["task"] workitem_index = cmd_conf["index"] # initialize the data provider provider_conf = cmd_conf["data"] self._provider = data_provider_factory.create(provider_conf) self._provider.start() # initialize storage storage_conf = cmd_conf["storage"] self.storage = storage_factory.create(storage_conf) self.storage = self.storage.get_container(case_name) # load the module and task descriptors task_desc = self._provider.load_task(case_name, task_cname) workitem_desc = self._provider.load_workitem(case_name, task_cname, workitem_index) partition = workitem_desc["partition"] # setup task configuration self.conf = Data.create(task_desc["conf"]) self.conf["__task_index"] = workitem_index self.conf.expand_vars() # setup task attributes self.case = workitem_desc["case"] self.task = workitem_desc["task"] self.id = workitem_desc["cname"] self.name = workitem_desc["name"] self.index = workitem_index # initialize decorators self._main = None self._sources = [] self._foreach = None self._begin = None self._end = None self._start_time = 0 self._end_time = self._start_time # intialize task logging log_conf = self.conf.get("logging") logger.initialize(log_conf) self.logger = logger.get_logger(self.name) self.logger.debug("Task descriptor: {}".format(Data.create(task_desc))) self.logger.debug("WorkItem descriptor: {}".format( Data.create(workitem_desc))) # Initialize input stream self._stream = Stream(self._provider, task_desc["stream"]) # Initialize ports self._ports = {} self._in_ports = [] self._out_ports = [] self._open_ports = {} if "ports" in task_desc and "ports" in partition: port_descriptors = Data.create(task_desc["ports"]) for port_desc in port_descriptors.get("in", default=list): port_desc["mode"] = PORT_MODE_IN self._ports[port_desc["name"]] = port_desc self._in_ports += [port_desc] for port_desc in port_descriptors.get("out", default=list): port_desc["mode"] = PORT_MODE_OUT self._ports[port_desc["name"]] = port_desc self._out_ports += [port_desc] port_descriptors = Data.create(partition["ports"]) for port_desc in port_descriptors.get("in", default=list): task_port_desc = self._ports[port_desc["name"]] task_port_desc["data"] = port_desc["data"] for port_desc in port_descriptors.get("out", default=list): task_port_desc = self._ports[port_desc["name"]] task_port_desc["data"] = port_desc["data"] # Get hostname try: import socket self.hostname = socket.gethostname() except: self.hostname = "unknown" # The context field is free to be used by the task user to # save variables related with the whole task life cycle. # By default it is initialized with a dictionary but can be # overwrote with any value by the user. Wok will never use it. self.context = {}
def __init__(self): # Get task key and storage configuration cmd_conf = OptionsConfig(required=["case", "task", "index", "data.type", "storage.type"]) # Register signals self._signal_names = {} for signame in [x for x in dir(signal) if x.startswith("SIG")]: try: signum = getattr(signal, signame) signal.signal(signum, self.__signal_handler) self._signal_names[signum] = signame except: pass # command line configuration case_name = cmd_conf["case"] task_cname = cmd_conf["task"] workitem_index = cmd_conf["index"] # initialize the data provider provider_conf = cmd_conf["data"] self._provider = data_provider_factory.create(provider_conf) self._provider.start() # initialize storage storage_conf = cmd_conf["storage"] self.storage = storage_factory.create(storage_conf) self.storage = self.storage.get_container(case_name) # load the module and task descriptors task_desc = self._provider.load_task(case_name, task_cname) workitem_desc = self._provider.load_workitem(case_name, task_cname, workitem_index) partition = workitem_desc["partition"] # setup task configuration self.conf = Data.create(task_desc["conf"]) self.conf["__task_index"] = workitem_index self.conf.expand_vars() # setup task attributes self.case = workitem_desc["case"] self.task = workitem_desc["task"] self.id = workitem_desc["cname"] self.name = workitem_desc["name"] self.index = workitem_index # initialize decorators self._main = None self._sources = [] self._foreach = None self._begin = None self._end = None self._start_time = 0 self._end_time = self._start_time # intialize task logging log_conf = self.conf.get("logging") logger.initialize(log_conf) self.logger = logger.get_logger(self.name) self.logger.debug("Task descriptor: {}".format(Data.create(task_desc))) self.logger.debug("WorkItem descriptor: {}".format(Data.create(workitem_desc))) # Initialize input stream self._stream = Stream(self._provider, task_desc["stream"]) # Initialize ports self._ports = {} self._in_ports = [] self._out_ports = [] self._open_ports = {} if "ports" in task_desc and "ports" in partition: port_descriptors = Data.create(task_desc["ports"]) for port_desc in port_descriptors.get("in", default=list): port_desc["mode"] = PORT_MODE_IN self._ports[port_desc["name"]] = port_desc self._in_ports += [port_desc] for port_desc in port_descriptors.get("out", default=list): port_desc["mode"] = PORT_MODE_OUT self._ports[port_desc["name"]] = port_desc self._out_ports += [port_desc] port_descriptors = Data.create(partition["ports"]) for port_desc in port_descriptors.get("in", default=list): task_port_desc = self._ports[port_desc["name"]] task_port_desc["data"] = port_desc["data"] for port_desc in port_descriptors.get("out", default=list): task_port_desc = self._ports[port_desc["name"]] task_port_desc["data"] = port_desc["data"] # Get hostname try: import socket self.hostname = socket.gethostname() except: self.hostname = "unknown" # The context field is free to be used by the task user to # save variables related with the whole task life cycle. # By default it is initialized with a dictionary but can be # overwrote with any value by the user. Wok will never use it. self.context = {}