def load_nodes(self, csv_path): """Try reading nodes from csv. Does not add any nodes on error. Arguments: csv_path {str} -- csv path """ rows = [] with open(csv_path, newline='') as f: reader = csv.reader(f, delimiter=",", quotechar='"') for row in reader: rows.append(row) all_nodes = [] for n, node in enumerate(rows): try: name = node[0].strip() ip = node[1].strip() node = BaseNode(ip, name=name) node.GROUPS = node[2:] all_nodes.append(node) except IndexError: raise RuntimeError( f"Row {n} in file {csv_path} must at least define column 'name' and 'ip'" ) return all_nodes
# A list of jobs to be executed within the network. # This can be an instance of BaseJob or any of its subclasses # or a path to csv file defining jobs (see documentation). JOBS = [ SourceJob(outputs=[ IntPipe(1), ]), MiddleJob(inputs=[ IntPipe(1), ], outputs=[ IntPipe(2), ]), SumJob(inputs=[ IntPipe(2), ]) ] # A list of nodes to be included in the network # This can be an instance of BaseNode or Multinode # or a path to csv file defining nodes (see documentation) NODES = [ BaseNode("192.168.4.3"), BaseNode("192.168.4.2"), BaseNode("192.168.4.4"), BaseNode("192.168.4.5"), ] # Optional XML file defining the whole network or a part of it # This is not recommended and only exists for specific usecases XML_FILE = None
], ), normfn( inputs=[ PicklePipe(7), ], outputs=[ PicklePipe(8), ], ), cov2svd( inputs=[ PicklePipe(8), PicklePipe(9), ], ), ] # A list of nodes to be included in the network # This can be an instance of BaseNode or Multinode # or a path to csv file defining nodes (see documentation) NODES = [ BaseNode("192.168.0.30"), BaseNode("192.168.0.29"), # BaseNode("192.168.0.28"), ] # Optional XML file defining the whole network or a part of it # This is not recommended and only exists for specific usecases XML_FILE = None
PicklePipe(4), ], outputs=[ PicklePipe(14), ] ), CSVSinkJob_2( path="file.csv", inputs=[ PicklePipe(11), PicklePipe(12), PicklePipe(13), PicklePipe(14), ] ) ] # A list of nodes to be included in the network # This can be an instance of BaseNode or Multinode # or a path to csv file defining nodes (see documentation) NODES = [ Node2("192.168.0.30"), BaseNode("192.168.0.29"), Node1("192.168.0.28"), Node1("192.168.0.27"), ] # Optional XML file defining the whole network or a part of it # This is not recommended and only exists for specific usecases XML_FILE = None
def load(self, xml_file): """Read XML file and generate according network Arguments: xml_file {str} -- path to xml file Return: bool -- Whether parsing succeded """ print(f"Reading XML file from {xml_file}") with open(xml_file, "r") as f: root = f.read() root = objectify.fromstring(root) jobs = [] nodes = [] # loop through executables for node in root.getchildren(): ip = node.get(self.ip) job_buffer = [] for p in node.getchildren(): inputs = [] outputs = [] parameters = [] # get name of executable executable = p.get(self.executable) # get path path = p.get(self.dependencies) # loop through parameters (input/output pipes and additional params) for atom in p.getchildren(): # collect input pipes if (atom.tag == self.input): pipe = LoaderPipe(int(atom.get(self.pipe_id))) if not atom.get(self.block_size): pipe.block_size = 1 else: pipe.block_size = int(atom.get(self.block_size)) count = atom.get(self.block_count) if count: pipe.block_count = int(count) else: pipe.block_count = 1 inputs.append(pipe) # collect output pipes if (atom.tag == self.output): pipe = LoaderPipe(int(atom.get(self.pipe_id))) if not atom.get(self.block_size): pipe.block_size = 1 else: print(self.block_size) print(type(self.block_size)) pipe.block_size = int(atom.get(self.block_size)) count = atom.get(self.block_count) if count: pipe.block_count = int(count) else: pipe.block_count = 1 outputs.append(pipe) # collect parameter if (atom.tag == self.parameter_tag): param = atom.get(self.parameter) parameters.append(param) parameters = " ".join(parameters) job = DynamicMARVELOJob(path, executable, parameters, inputs=inputs, outputs=outputs) jobs.append(job) job_buffer.append(job) node = BaseNode(ip) # print(job_buffer) node.max_jobs = len(job_buffer) node.add_jobs(job_buffer) nodes.append(node) return nodes, jobs
def status_callback(self, status, node, job): """Handling callbacks from JobCluster Arguments: status {dispy status} -- New status (Job status or node status) node {dispy.DispyNode} -- Updated node job {dispy.DispyJob} -- Updated job """ # skip execution when network bis been shutdown if self._shutdown: return if status == dispy.DispyNode.Initialized: if node.ip_addr not in self.nodes.keys(): logger.info(f"Created new BaseNode for {node.ip_addr}.") _node = BaseNode(node.ip_addr) _node.max_jobs = node.cpus GROUPS["ALL"].add_node(_node) self.nodes[node.ip_addr] = _node logger.info( "Node {0.ip_addr} showed up with {0.avail_cpus} CPUs.".format(node)) self.up(node.ip_addr) if self.pre_copy: self.send_files(nodes=[self.nodes[node.ip_addr]]) elif status == dispy.DispyNode.Closed: closed_jobs = self.down(node.ip_addr) logger.warning( f"Node {node.ip_addr} closed with {closed_jobs} Jobs.") if self.cluster and not self.is_ready(): jobs = [] for j in closed_jobs: for i in j.inputs: jobs.append(i.source) for o in j.outputs: jobs.append(o.destination) for i in jobs: if i.dispy_job: pass # self.cluster.cancel(i.dispy_job) # self.restart(i.dispy_job.id) try: nodes = self.allocate_jobs() logger.debug(f"Remaining Jobs allocated on {nodes} Nodes.") self.submit(self.cluster) except AllocationError: logger.error("Allocation error, shutting down...") self.shutdown() self._finished.set() elif status == dispy.DispyNode.AvailInfo: try: # logger.debug(f"Heartbeat from Node {node.ip_addr}.") self.nodes[node.ip_addr].info = [node.avail_info.cpu, node.avail_info.memory, node.avail_info.disk, node.avail_info.swap] except AttributeError: logger.exception( f"Node {node.ip_addr} has no psutil installed.") # JOBS elif status == dispy.DispyJob.Abandoned: logger.debug(f"DispyJob {job.id} abandoned.") elif status == dispy.DispyJob.Finished: logger.info( f"DispyJob with ID {job.id} finished.") if job.result: logger.debug(f"Result: {job.result}") if job.stdout: logger.debug(f"Stdout: {job.stdout}") self.finished(job.id) elif status == dispy.DispyJob.Created: pass # logger.debug(f"DispyJob created with ID {job.id}") elif status == dispy.DispyJob.Running: logger.debug(f"DispyJob running with ID {job.id}") self.running(job.id) elif status == dispy.DispyJob.Terminated: logger.debug( f"Job {self._get_job_by_dispy_id(job.id)} terminated on node {job.ip_addr}") if job.exception: logger.debug(f"With error:\n{job.exception}") self.terminated(job.id) elif status == dispy.DispyJob.Cancelled: logger.debug( f"DispyJob with ID {job.id} cancelled.\nResult: {job.result}\nStdout: {job.stdout}" ) self.terminated(job.id) # self.submit(self.cluster, self.ssh_user) else: if node: logger.warning( f"Unexpected callback from {node.ip_addr} with status {status}") else: logger.warning( f"Unexpected callback from {job.ip_addr} with status {status}") if job.stdout: logger.warning(f"Out: {job.stdout}") if job.exception: logger.warning(f"Traceback: {job.exception}")
def setUp(self): self.Node1 = BaseNode("192.168.4.2") self.Node2 = BaseNode("192.168.4.3") self.Job = BaseJob() self.Job2 = BaseJob()