def go(self): self.logger.info("Starting datamapper run") super(datamapper, self).go() # We build lists of compute-nodes per cluster and data-per-cluster, # then match them up to schedule jobs in a round-robin fashion. # ---------------------------------------------------------------------- clusterdesc = ClusterDesc(self.config.get('cluster', "clusterdesc")) if clusterdesc.subclusters: available_nodes = dict((cl.name, cycle(get_compute_nodes(cl))) for cl in clusterdesc.subclusters) else: available_nodes = { clusterdesc.name: cycle(get_compute_nodes(clusterdesc)) } data = defaultdict(list) for filename in self.inputs['args']: subcluster = filename.split(os.path.sep)[2] try: host = available_nodes[subcluster].next() except KeyError, key: self.logger.error("%s is not a known cluster" % str(key)) raise data[host].append(filename)
def go(self): self.logger.info("Starting datamapper run") super(datamapper, self).go() # We build lists of compute-nodes per cluster and data-per-cluster, # then match them up to schedule jobs in a round-robin fashion. # ---------------------------------------------------------------------- clusterdesc = ClusterDesc(self.config.get('cluster', "clusterdesc")) if clusterdesc.subclusters: available_nodes = dict( (cl.name, cycle(get_compute_nodes(cl))) for cl in clusterdesc.subclusters ) else: available_nodes = { clusterdesc.name: cycle(get_compute_nodes(clusterdesc)) } data = defaultdict(list) for filename in self.inputs['args']: subcluster = filename.split(os.path.sep)[2] try: host = available_nodes[subcluster].next() except KeyError, key: self.logger.error("%s is not a known cluster" % str(key)) raise data[host].append(filename)
def go(self): self.logger.info("Starting thumbnail_combine run") super(thumbnail_combine, self).go() hosts = get_compute_nodes( ClusterDesc(self.config.get('cluster', "clusterdesc"))) command = "python %s" % (self.__file__.replace('master', 'nodes')) jobs = [] for host in hosts: jobs.append( ComputeJob(host, command, arguments=[ self.inputs['executable'], self.inputs['file_pattern'], self.inputs['input_dir'], self.inputs['output_file'], self.inputs['clobber'] ])) self._schedule_jobs(jobs) if self.error.isSet(): self.logger.warn("Failed compute job process detected") return 1 else: return 0
def compute_nodes(configfile='~/.pipeline.cfg'): clusterdesc = ClusterDesc( _get_config( os.path.expanduser(configfile) ).get('cluster', 'clusterdesc') ) env.hosts = get_compute_nodes(clusterdesc)
def go(self): self.logger.info("Starting thumbnail_combine run") super(thumbnail_combine, self).go() hosts = get_compute_nodes( ClusterDesc(self.config.get('cluster', "clusterdesc")) ) command = "python %s" % (self.__file__.replace('master', 'nodes')) jobs = [] for host in hosts: jobs.append( ComputeJob( host, command, arguments=[ self.inputs['executable'], self.inputs['file_pattern'], self.inputs['input_dir'], self.inputs['output_file'], self.inputs['clobber'] ] ) ) self._schedule_jobs(jobs) if self.error.isSet(): self.logger.warn("Failed compute job process detected") return 1 else: return 0
def __init__(self, config, logger=None): if not logger: logging.basicConfig() self.logger = logging.getLogger() else: self.logger = logger clusterdesc = ClusterDesc(config.get('cluster', 'clusterdesc')) self.head_node = get_head_node(clusterdesc)[0] self.compute_nodes = get_compute_nodes(clusterdesc) self.script_path = config.get('deploy', 'script_path') self.config = config
def go(self): self.logger.info("Starting datamapper run") super(datamapper, self).go() # We build lists of compute-nodes per cluster and data-per-cluster, # then match them up to schedule jobs in a round-robin fashion. # ---------------------------------------------------------------------- clusterdesc = ClusterDesc(self.config.get('cluster', "clusterdesc")) if clusterdesc.subclusters: available_nodes = dict((cl.name, cycle(get_compute_nodes(cl))) for cl in clusterdesc.subclusters) else: available_nodes = { clusterdesc.name: cycle(get_compute_nodes(clusterdesc)) } data = defaultdict(list) for filename in self.inputs['args']: subcluster = filename.split(os.path.sep)[2] try: host = next(available_nodes[subcluster]) except KeyError as key: self.logger.error("%s is not a known cluster" % str(key)) raise data[host].append(filename) # Dump the generated mapping to a parset # ---------------------------------------------------------------------- parset = Parset() for host, filenames in data.items(): parset.addStringVector(host, filenames) parset.writeFile(self.inputs['mapfile']) self.outputs['mapfile'] = self.inputs['mapfile'] return 0
def get_compute_nodes(clusterdesc_file): """ Read a cluster description file and return list of nodes Parameters ---------- clusterdesc_file : str Filename of cluster description file Returns ------- result : list Sorted list of node names """ from lofarpipe.support import clusterdesc cluster = clusterdesc.ClusterDesc(clusterdesc_file) return sorted(clusterdesc.get_compute_nodes(cluster))
def group_files(logger, clusterdesc, node_directory, group_size, filenames): """ Group a list of files into blocks suitable for simultaneous processing, such that a limited number of processes run on any given host at a time. All node_directory on all compute nodes specified in clusterdesc is searched for any of the files listed in filenames. A generator is produced; on each call, no more than group_size files per node are returned. """ # Given a limited number of processes per node, the first task is to # partition up the data for processing. logger.debug('Listing data on nodes') data = {} for node in get_compute_nodes(clusterdesc): logger.debug("Node: %s" % (node)) exec_string = ["ssh", node, "--", "find", node_directory, "-maxdepth 1", "-print0" ] logger.debug("Executing: %s" % (" ".join(exec_string))) my_process = subprocess.Popen( exec_string, stdout=subprocess.PIPE, stderr=subprocess.PIPE ) sout = my_process.communicate()[0] data[node] = sout.split('\x00') data[node] = utilities.group_iterable( [element for element in data[node] if element in filenames], group_size, ) # Now produce an iterator which steps through the various chunks of # data to image, and image each chunk data_iterator = utilities.izip_longest(*list(data.values())) for data_chunk in data_iterator: to_process = [] for node_data in data_chunk: if node_data: to_process.extend(node_data) yield to_process
def plugin_main(args, **kwargs): """ Takes in mapfiles and change host names to allow for efficient MPI reduction Parameters ---------- mapfiles : list of strs List of the names of the input mapfiles. WILL BE MODIFIED! mapfile_dir : str Name of the directory containing the mapfile head_node_only : str String: Either True or False. Describes whether to use just the head node or not. Returns ------- result : empty dictionary """ result = {} mapfiles = (kwargs['mapfiles'][1:-1]).split(',') # read in list of mapfiles from string (separated by commas) mapfile_dir = kwargs['mapfile_dir'] head_node_only = (kwargs['head_node_only'] in ['True','true','T','t','1']) fn_list=[] for mf in mapfiles: fn_list.append( os.path.join(mapfile_dir,mf) ) # caution: remember to reload the compute node iterable for every mapfile to ensure corresponding entries have the same node set as host for fn in fn_list: if(head_node_only): cn_cycle = it.cycle( get_head_node( ClusterDesc(str(os.environ['cluster_desc_file'])) ) ) # Read in head node. Set up iterator (unnessary with just one node, but better to have less code!) else: cn_cycle = it.cycle( get_compute_nodes( ClusterDesc(str(os.environ['cluster_desc_file'])) ) ) # Read in list of compute nodes. Set up iterator to cyclically iterate over them. data = DataMap.load(fn) # read in current data map file (probably with all host values set to "localhost") iterator = DataMap.SkipIterator(data) # set up iterator for all values in mapfile for value in iterator: value.host = cn_cycle.next() # iterate through map file, assigning each entry a host from the available compute nodes in a cyclical fashion data.save(fn) # overwrite original file return result
def compute_nodes(configfile='~/.pipeline.cfg'): clusterdesc = ClusterDesc( _get_config(os.path.expanduser(configfile)).get( 'cluster', 'clusterdesc')) env.hosts = get_compute_nodes(clusterdesc)