def path_exists(self, path): cmd = self.ssh_cmd + ['ls %s' % (path)] logger.info('ltacp %s: checking if source exists. executing: %s' % (self.logId, ' '.join(cmd))) proc = Popen(cmd, stdout=PIPE, stderr=PIPE) self.started_procs[proc] = cmd # block until find is finished communicate_returning_strings(proc) del self.started_procs[proc] logger.info('ltacp %s: source %s %s' % (self.logId, path, 'exists' if proc.returncode == 0 else 'does not exist')) return proc.returncode == 0
def _run_bbs_control(self, bbs_parset, run_flag): """ Run BBS Global Control and wait for it to finish. Return its return code. """ self.logger.info("Running BBS GlobalControl") working_dir = tempfile.mkdtemp(suffix=".%s" % (os.path.basename(__file__), )) with CatchLog4CPlus(working_dir, self.logger.name + ".GlobalControl", os.path.basename(self.inputs['control_exec'])): with utilities.log_time(self.logger): try: bbs_control_process = utilities.spawn_process( [self.inputs['control_exec'], bbs_parset, "0"], self.logger, cwd=working_dir, env=self.environment) # _monitor_process() needs a convenient kill() method. bbs_control_process.kill = lambda: os.kill( bbs_control_process.pid, signal.SIGKILL) except OSError as e: self.logger.error("Failed to spawn BBS Control (%s)" % str(e)) self.killswitch.set() return 1 finally: run_flag.set() returncode = self._monitor_process(bbs_control_process, "BBS Control") sout, serr = communicate_returning_strings(bbs_control_process) shutil.rmtree(working_dir) log_process_output(self.inputs['control_exec'], sout, serr, self.logger) return returncode
def _run_bbs_kernel(self, host, command, *arguments): """ Run command with arguments on the specified host using ssh. Return its return code. The resultant process is monitored for failure; see _monitor_process() for details. """ try: bbs_kernel_process = run_remote_command(self.config, self.logger, host, command, self.environment, arguments=arguments) except OSError: self.logger.exception("BBS Kernel failed to start") self.killswitch.set() return 1 result = self._monitor_process(bbs_kernel_process, "BBS Kernel on %s" % host) sout, serr = communicate_returning_strings(bbs_kernel_process) serr = serr.replace("Connection to %s closed.\r\n" % host, "") log_process_output("SSH session (BBS kernel)", sout, serr, self.logger) return result
def get_stations_rcu_mode(stations=None): ''' Get the current rcu mode of a station. :param stations - string or list of strings: 1 or more station names, or lcu hostnames :return: dict with station rcu mode integer pairs ''' if stations == None: stations = get_current_stations(as_host_names=True) elif isinstance(stations, str): stations = [stations] procs = {} for station in stations: cmd = ["rspctl", "--rcu | grep ON | awk '{ print $4 }' | grep mode | cut -c 6-6 | sort -u | head -n 1"] cmd = wrap_command_in_lcu_station_ssh_call(cmd, station, via_head=True) logger.debug('executing cmd: %s', ' '.join(cmd)) proc = Popen(cmd, stdout=PIPE, stderr=PIPE) procs[station] = proc result = {} for station, proc in list(procs.items()): out, err = communicate_returning_strings(proc) if proc.returncode != 0: logger.warning("Could not determine rcu mode for station %s. sdterr=%s" % (station, err)) try: rcu_mode = int(out.strip()) logger.debug('station %s is in rcumode=%s', station, rcu_mode) result[station] = rcu_mode except ValueError: logger.warning("Could not determine rcu mode for station %s. sdterr=%s" % (station, err)) return result
def getSubDirectories(self, path): logger.debug('getSubDirectories(%s)', path) # get the subdirectories of the given path cmd = ['find', path.rstrip('/'), '-maxdepth', '1', '-type', 'd'] cmd = wrap_command_in_cep4_head_node_ssh_call_if_needed(cmd) logger.debug(' '.join(cmd)) proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = communicate_returning_strings(proc) if proc.returncode != 0: # lfs puts it's error message in stdout logger.error(out + err) return {'found': False, 'path': path, 'message': out + err} # parse out, clean lines and skip first line which is path itself. lines = [l.strip() for l in out.split('\n')][1:] subdir_names = [ l.split('/')[-1].strip().strip('/') for l in lines if l ] result = {'found': True, 'path': path, 'sub_directories': subdir_names} logger.debug('getSubDirectories(%s) result: %s', path, result) return result
def getTaskLogHtml(task_id): task = radb().getTask(task_id) cmd = [] if task['type'] == 'pipeline': cmd = [ 'ssh', '*****@*****.**', 'cat /data/log/pipeline-%s-*.log' % task['otdb_id'] ] else: cmd = [ 'ssh', 'mcu001.control.lofar', 'cat /opt/lofar/var/log/mcu001\\:ObservationControl\\[0\\]\\{%s\\}.log*' % task['otdb_id'] ] logger.info(' '.join(cmd)) proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = communicate_returning_strings(proc) if proc.returncode == 0: return out, 200, {'Content-Type': 'text/plain; charset=utf-8'} else: return err, 500, {'Content-Type': 'text/plain; charset=utf-8'}
def _create_parmdb(self, parmdb_executable, target_dir_path): """ _create_parmdb, creates a parmdb_executable at the target_dir_path using the suplied executable. Does not test for existence of target parent dir returns 1 if parmdb_executable failed 0 otherwise """ # Format the template string by inserting the target dir formatted_template = _TEMPLATE_PARMDB.format(target_dir_path) try: # Spawn a subprocess and connect the pipelines parmdbm_process = subprocess.Popen(parmdb_executable, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) # Send formatted template on stdin sout, serr = communicate_returning_strings( parmdbm_process, input=formatted_template) # Log the output log_process_output("parmdbm", sout, serr, self.logger) except OSError as oserror: self.logger.error("Failed to spawn parmdbm: {0}".format( str(oserror))) return 1 return 0
def get_current_stations(station_group='today', as_host_names=True): ''' Wrapper function around the amazing lcurun and stations.txt operators system. Get a list of the currently used station names, either as hostname, or as parset-like station name (default) :param station_group - string: one of the predefined operator station groups, like: 'today', 'today_nl', 'core', etc. Defaults to 'today' which means all active stations. :param as_host_names - bool: return the station names as ssh-able hostnames if True (like cs001c, cs002c). return the station names as parset-like VirtualInstrument.stationList names if False (like CS001, CS002). :return: the station names for the given station_group as ssh-able hostnames if as_host_names=True (like cs001c, cs002c) or as parset-like VirtualInstrument.stationList names if as_host_names=False (like CS001, CS002). ''' cmd = ['cat', '/opt/operations/bin/stations.txt'] cmd = wrap_command_in_lcu_head_node_ssh_call(cmd) logger.debug('executing cmd: %s', ' '.join(cmd)) proc = Popen(cmd, stdout=PIPE, stderr=PIPE) out, err = communicate_returning_strings(proc) if proc.returncode != 0: raise LCURuntimeError("Could not fetch stations.txt file. sdterr=%s" % (err, )) station_file_lines = out.splitlines(False) station_group_filter = station_group.strip()+' ' station_group_line = next(l for l in station_file_lines if l.startswith(station_group_filter)) station_aliases = station_group_line.split(' ')[-1].split(',') station_hostnames = [] for station_alias in station_aliases: # the alias mapping is at the top of the file, so the first matching line holds the mapping station_alias_line = next(l for l in station_file_lines if station_alias in l) station_hostname = station_alias_line.split()[0].strip() station_hostnames.append(station_hostname) if as_host_names: logger.info("station hostnames in group '%s': %s", station_group, ' '.join(station_hostnames)) return station_hostnames station_names = [hostname2stationname(x) for x in station_hostnames] logger.info("stations in group '%s': %s", station_group, ' '.join(station_names)) return station_names
def GetSIP(self): try: if self.Type == "MoM": with self.momClient: self.SIP = self.momClient.uploadDataAndGetSIP(self.ArchiveId, self.ticket, self.FileName, self.PrimaryUri, self.FileSize, self.MD5Checksum, self.Adler32Checksum, validate=True) elif 'SIPLocation' in self.job: # job file might know where the sip is when it is not a MoM job try: sip_host = self.job['SIPLocation'].split(':')[0] sip_path = self.job['SIPLocation'].split(':')[1] cmd = ['ssh', '-tt', '-n', '-x', '-q', '%s@%s' % (self.user, sip_host), 'cat %s' % sip_path] logger.info("GetSIP for %s at SIPLocation %s - cmd %s" % (self.JobId, self.job['SIPLocation'], ' ' .join(cmd))) p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = communicate_returning_strings(p) if p.returncode != 0: raise PipelineError('GetSIP error getting EoR SIP for %s: %s' % (self.JobId, out + err)) self.SIP = out self.SIP = addIngestInfoToSIP(self.SIP, self.ticket, self.FileSize, self.MD5Checksum, self.Adler32Checksum) self.SIP = self.SIP.replace('<stationType>Europe</stationType>','<stationType>International</stationType>') #make sure the source in the SIP is the same as the type of the storageticket self.SIP = re.compile('<source>eor</source>', re.IGNORECASE).sub('<source>%s</source>' % (self.Type,), self.SIP) if not validateSIPAgainstSchema(self.SIP): logger.error('Invalid SIP:\n%s', self.SIP) raise Exception('SIP for %s does not validate against schema' % self.JobId) except: logger.exception('Getting SIP from SIPLocation %s failed', self.job['SIPLocation']) raise logger.info('SIP received for %s from SIPLocation %s with size %d (%s): \n%s' % (self.JobId, self.job['SIPLocation'], len(self.SIP), humanreadablesize(len(self.SIP)), self.SIP[0:1024])) else: self.SIP = makeSIP(self.Project, self.ObsId, self.ArchiveId, self.ticket, self.FileName, self.FileSize, self.MD5Checksum, self.Adler32Checksum, self.Type) self.FileType = FILE_TYPE_UNSPECIFIED except Exception as e: if self.minimal_SIP: logger.info('making minimal SIP for %s', self.JobId) self.SIP = makeSIP(self.Project, self.ObsId, self.ArchiveId, self.ticket, self.FileName, self.FileSize, self.MD5Checksum, self.Adler32Checksum, self.Type) logger.info('minimal SIP for %s: \n%s', self.JobId, self.SIP) self.FileType = FILE_TYPE_UNSPECIFIED else: raise
def disk_usage(*paths): """ Return the disk usage in bytes by the file(s) in ``paths``. """ cmd = ['du', '-s', '-b'] proc = subprocess.Popen(cmd + list(paths), stdout=subprocess.PIPE) sout = communicate_returning_strings(proc)[0] if sout: return sum([int(s.split('\t')[0]) for s in sout.strip().split('\n')]) else: return 0
def _removeRemoteFifo(self): if hasattr(self, 'remote_data_fifo') and self.remote_data_fifo: '''remove a file (or fifo) on a remote host. Test if file exists before deleting.''' cmd_remote_ls = self.ssh_cmd + [ 'ls %s' % (self.remote_data_fifo, ) ] p_remote_ls = Popen(cmd_remote_ls, stdout=PIPE, stderr=PIPE) communicate_returning_strings(p_remote_ls) if p_remote_ls.returncode == 0: cmd_remote_rm = self.ssh_cmd + [ 'rm %s' % (self.remote_data_fifo, ) ] logger.info('ltacp %s: removing remote fifo. executing: %s' % (self.logId, ' '.join(cmd_remote_rm))) p_remote_rm = Popen(cmd_remote_rm, stdout=PIPE, stderr=PIPE) communicate_returning_strings(p_remote_rm) if p_remote_rm.returncode != 0: logger.error("Could not remove remote fifo %s@%s:%s\n%s" % (self.src_user, self.src_host, self.remote_data_fifo, p_remote_rm.stderr))
def pathExists(self, path): cmd = ['lfs', 'ls', path] cmd = wrap_command_in_cep4_head_node_ssh_call_if_needed(cmd) logger.debug(' '.join(cmd)) proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = communicate_returning_strings(proc) if proc.returncode != 0 and 'No such file or directory' in err: return False return True
def getDiskUsageForPath_du(path): logger.info('getDiskUsageForPath_du(\'%s\')', path) result = { 'found': False, 'path': path, 'disk_usage': None, 'name': path.split('/')[-1] } cmd = ['du', '-bcs', path] cmd = wrap_command_in_cep4_head_node_ssh_call_if_needed(cmd) logger.info(' '.join(cmd)) proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = communicate_returning_strings(proc) if proc.returncode == 0: # example of out # 7025510839 /data/projects/HOLOG_WINDMILL_TESTS/L662734/uv/ # 7025510839 total #parse out lines = [l.strip() for l in out.split('\n')] total_lines = [l for l in lines if 'total' in l] if total_lines: parts = [p.strip() for p in total_lines[0].split()] if len(parts) == 2: result['found'] = True result['disk_usage'] = int(parts[0]) result['nr_of_files'] = None else: result['message'] = out + err result['found'] = False if 'No such file or directory' in err: logger.warning('No such file or directory: %s', path) result['disk_usage'] = 0 else: logger.error(out + err) result['disk_usage_readable'] = humanreadablesize(result['disk_usage']) otdb_id = getOTDBIdFromPath(path) if otdb_id: result['otdb_id'] = otdb_id logger.info('getDiskUsageForPath_du(\'%s\') returning: %s', path, result) return result
def path_mounted(self, path): logger.info(os.path.normpath(path)) logger.info(os.path.normpath(path).strip().split(os.path.sep)) root_dir = os.path.sep + [ dir for dir in os.path.normpath(path).strip().split(os.path.sep) if dir ][0] cmd = self.ssh_cmd + ['mount | grep %s' % (root_dir)] logger.info( "ltacp %s: checking if '%s' of path '%s' is mounted. executing: %s" % (self.logId, root_dir, path, ' '.join(cmd))) proc = Popen(cmd, stdout=PIPE, stderr=PIPE) self.started_procs[proc] = cmd # block until find is finished communicate_returning_strings(proc) del self.started_procs[proc] logger.info( "ltacp %s: '%s' of path '%s' %s" % (self.logId, root_dir, path, 'is mounted' if proc.returncode == 0 else 'is not mounted')) return proc.returncode == 0
def can_logon_to_source_host(self): cmd_login_to_source_host = self.ssh_cmd + ['true'] logger.info('ltacp %s: logging in to source host. executing: %s' % (self.logId, ' '.join(cmd_login_to_source_host))) proc = Popen(cmd_login_to_source_host, stdout=PIPE, stderr=PIPE) self.started_procs[proc] = cmd_login_to_source_host # block until find is finished out, err = communicate_returning_strings(proc) del self.started_procs[proc] if proc.returncode == 0: logger.info('ltacp %s: can login to %s@%s', self.logId, self.src_user, self.src_host) return True logger.error('ltacp %s: cannot login to %s@%s error: %s', self.logId, self.src_user, self.src_host, err) return False
def go(self): self.logger.info("Starting example recipe run") super(example, self).go() self.logger.info("This is a log message") my_process = subprocess.Popen([self.inputs['executable']], stdout=subprocess.PIPE, stderr=subprocess.PIPE) sout, serr = communicate_returning_strings(my_process) self.outputs['stdout'] = sout log_process_output(self.inputs['executable'], sout, serr, self.logger) if my_process.returncode == 0: return 0 else: self.logger.warn("Return code (%d) is not 0." % my_process.returncode) return 1
def read_initscript(logger, filename, shell="/bin/sh"): """ Return a dict of the environment after sourcing the given script in a shell. """ if not os.path.exists(filename): logger.warn("Environment initialisation script not found!") return {} else: logger.debug("Reading environment from %s" % filename) p = subprocess.Popen(['. %s ; env' % (filename)], shell=True, executable=shell, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True) so, se = communicate_returning_strings(p) environment = [x.split('=', 1) for x in so.strip().split('\n')] environment = [x for x in environment if len(x) == 2] return dict(environment)
def CheckForValidSIP(self): if self.Type == "MoM": try: with self.momClient: self.momClient.getSIP(self.ArchiveId, validate=True, log_prefix=self.JobId) except Exception as e: logger.log(logging.WARNING if self.minimal_SIP else logging.ERROR, 'CheckForValidSIP: Getting SIP from MoM failed for %s: %s', self.JobId, e) if not self.minimal_SIP: raise elif 'SIPLocation' in self.job: # job file might know where the sip is when it is not a MoM job try: sip_host = self.job['SIPLocation'].split(':')[0] sip_path = self.job['SIPLocation'].split(':')[1] cmd = ['ssh', '-tt', '-n', '-x', '-q', '%s@%s' % (self.user, sip_host), 'cat %s' % sip_path] logger.info("GetSIP for %s at SIPLocation %s - cmd %s" % (self.JobId, self.job['SIPLocation'], ' ' .join(cmd))) p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = communicate_returning_strings(p) if p.returncode != 0: raise PipelineError('GetSIP error getting EoR SIP for %s: %s' % (self.JobId, out + err)) tmp_SIP = out tmp_SIP = addIngestInfoToSIP(tmp_SIP, self.ticket, self.FileSize, self.MD5Checksum, self.Adler32Checksum) tmp_SIP = tmp_SIP.replace('<stationType>Europe</stationType>','<stationType>International</stationType>') #make sure the source in the SIP is the same as the type of the storageticket tmp_SIP = re.compile('<source>eor</source>', re.IGNORECASE).sub('<source>%s</source>' % (self.Type,), tmp_SIP) if not validateSIPAgainstSchema(tmp_SIP): logger.error('CheckForValidSIP: Invalid SIP:\n%s', tmp_SIP) raise Exception('SIP for %s does not validate against schema' % self.JobId) except: logger.exception('CheckForValidSIP: Getting SIP from SIPLocation %s failed', self.job['SIPLocation']) raise logger.info('SIP for %s is valid, can proceed with transfer' % (self.JobId,))
def done(self): if self.completed: return True if self.output_streams: return False # Process is finished, read remaining data and exit code (stdout, stderr) = communicate_returning_strings(self.process) self.exit_status = self.process.returncode self._addoutput(self.STDOUT, stdout, flush=True) self._addoutput(self.STDERR, stderr, flush=True) self.completed = True self.logger("Subprocess completed with exit status %d: %s" % (self.exit_status, " ".join(self.cmd))) return True
def createNetCatCmd(listener, user=None, host=None): '''helper method to determine the proper call syntax for netcat on host''' # nc has no version option or other ways to check it's version # so, just try the variants and pick the first one that does not fail if listener: nc_variants = ['nc --recv-only', 'nc'] else: nc_variants = ['nc -q 0', 'nc --send-only', 'nc'] for nc_variant in nc_variants: cmd = nc_variant.split(' ') if user and host: cmd = ['ssh', '-n', '-x', '%s@%s' % (user, host)] + cmd p = Popen(cmd, stdout=PIPE, stderr=PIPE) out, err = communicate_returning_strings(p) if 'invalid option' not in err: return nc_variant raise LtacpException('could not determine remote netcat version')
def getDiskFreeSpaceForMountpoint(mountpoint=CEP4_DATA_MOUNTPOINT): logger.info('getDiskFreeSpaceForMountpoint(\'%s\')', mountpoint) result = {'found': False, 'mountpoint': mountpoint} cmd = ['df', mountpoint] cmd = wrap_command_in_cep4_head_node_ssh_call_if_needed(cmd) logger.info(' '.join(cmd) + ' ...waiting for result...') proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = communicate_returning_strings(proc) if proc.returncode != 0: logger.error(out + err) result['message'] = out return result # example of out # Filesystem 1K-blocks Used Available Use% Mounted on # 10.134.233.65@o2ib:10.134.233.66@o2ib:/cep4-fs 3369564904320 1460036416928 1737591103048 46% /data #parse out lines = [l.strip() for l in out.split('\n')] data_line = next(l for l in lines if mountpoint in l) if data_line: parts = [p.strip() for p in data_line.split(' ')] result['found'] = True result['disk_size'] = 1024 * int(parts[1]) result['disk_usage'] = 1024 * int(parts[2]) result['disk_free'] = 1024 * int(parts[3]) result['disk_size_readable'] = humanreadablesize(result['disk_size']) result['disk_usage_readable'] = humanreadablesize(result['disk_usage']) result['disk_free_readable'] = humanreadablesize(result['disk_free']) logger.info('getDiskFreeSpaceForMountpoint(\'%s\') returning: %s', mountpoint, result) return result
def catch_segfaults(cmd, cwd, env, logger, max=1, cleanup=lambda: None, usageStats=None): """ Run cmd in cwd with env, sending output to logger. If it segfaults, retry upto max times. """ tries = 0 while tries <= max: if tries > 0: logger.debug("Retrying...") logger.debug("Running: %s" % (' '.join(cmd), )) process = spawn_process(cmd, logger, cwd, env) #add the created process to the usageStat object if usageStats: usageStats.addPID(process.pid) if 'casa' in cmd[0]: while process.returncode is None: process.poll() time.sleep(1) sout, serr = communicate_returning_strings(process) log_process_output(cmd[0], sout, serr, logger) if process.returncode == 0: break elif process.returncode == -11: logger.warn("%s process segfaulted!" % cmd[0]) cleanup() tries += 1 continue else: raise subprocess.CalledProcessError(process.returncode, cmd[0]) if tries > max: logger.error("Too many segfaults from %s; aborted" % (cmd[0])) raise subprocess.CalledProcessError(process.returncode, cmd[0]) return process
def _ncListen(self, log_name): # pick initial random port for data receiver port = str(random.randint(49152, 65535)) while True: # start listen for data stream cmd_listen = self.localNetCatCmd.split(' ') + ['-l', port] logger.info('ltacp %s: listening for %s. executing: %s' % (self.logId, log_name, ' '.join(cmd_listen))) p_listen = Popen(cmd_listen, stdout=PIPE, stderr=PIPE) time.sleep(0.5) if p_listen.poll() is not None: # nc returned prematurely, pick another port to listen to o, e = communicate_returning_strings(p_listen) logger.info('ltacp %s: nc returned prematurely: %s' % (self.logId, e.strip())) port = str(random.randint(49152, 65535)) else: self.started_procs[p_listen] = cmd_listen return (p_listen, port)
def group_files(logger, clusterdesc, node_directory, group_size, filenames): """ Group a list of files into blocks suitable for simultaneous processing, such that a limited number of processes run on any given host at a time. All node_directory on all compute nodes specified in clusterdesc is searched for any of the files listed in filenames. A generator is produced; on each call, no more than group_size files per node are returned. """ # Given a limited number of processes per node, the first task is to # partition up the data for processing. logger.debug('Listing data on nodes') data = {} for node in get_compute_nodes(clusterdesc): logger.debug("Node: %s" % (node)) exec_string = [ "ssh", node, "--", "find", node_directory, "-maxdepth 1", "-print0" ] logger.debug("Executing: %s" % (" ".join(exec_string))) my_process = subprocess.Popen(exec_string, stdout=subprocess.PIPE, stderr=subprocess.PIPE) sout, _ = communicate_returning_strings(my_process) data[node] = sout.split('\x00') data[node] = utilities.group_iterable( [element for element in data[node] if element in filenames], group_size, ) # Now produce an iterator which steps through the various chunks of # data to image, and image each chunk data_iterator = utilities.izip_longest(*list(data.values())) for data_chunk in data_iterator: to_process = [] for node_data in data_chunk: if node_data: to_process.extend(node_data) yield to_process
def is_soure_single_file(self): if isinstance(self.src_path, str): src_dirname = os.path.dirname(self.src_path) src_basename = os.path.basename(self.src_path) # get input filetype cmd_remote_filetype = self.ssh_cmd + [ 'stat -L -c %%F %s' % (os.path.join(src_dirname, src_basename), ) ] logger.info('ltacp %s: determining source type. executing: %s' % (self.logId, ' '.join(cmd_remote_filetype))) p_remote_filetype = Popen(cmd_remote_filetype, stdout=PIPE, stderr=PIPE) self.started_procs[p_remote_filetype] = cmd_remote_filetype # block until find is finished output_remote_filetype = communicate_returning_strings( p_remote_filetype) del self.started_procs[p_remote_filetype] if p_remote_filetype.returncode != 0: raise LtacpException( 'ltacp %s: determining source type failed: \nstdout: %s\nstderr: %s' % (self.logId, output_remote_filetype[0], output_remote_filetype[1])) for line in output_remote_filetype[0].split('\n'): if 'regular file' in line.strip(): logger.info('ltacp %s: remote path is a file' % (self.logId, )) return True logger.info('ltacp %s: remote path is a directory' % (self.logId)) return False else: #self.src_path is a list of files/dirs, so it is not a single file logger.info('ltacp %s: remote path is a list of files/dirs' % self.logId) return False
def run(self): while not self.stopFlag.isSet(): # ************************************* # first add new to track pids to the active list # in a lock to assure correct functioning self.lock.acquire() if self.pid_in: self.pid_tracked.extend(self.pid_in) # initiate the location for save stat information for pid in self.pid_in: self.pid_stats[pid] = [] self.pid_in = [] self.lock.release() (temp_file, temp_path) = tempfile.mkstemp() temp_file = open(temp_path, "w") temp_file.write(poller_string) temp_file.close() # now get stats for each tracked pid try: for pid in self.pid_tracked: pps = subprocess.Popen( ["bash", temp_path, str(pid)], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = communicate_returning_strings(pps) parset_output = eval( out.rstrip()) # remove trailing white space self.pid_stats[pid].append(parset_output) finally: os.remove(temp_path) time.sleep(self.poll_interval)
def __execute(cmd, log_prefix='', timeout=-1): """ helper method, wrapper around subprocess. execute command and return (stdout, stderr, returncode) tuple :param cmd: a subprocess Popen cmd like list :param log_prefix: an optional prefix for all log lines (can be used to provide a unique identifier to filter log lines in a file) :param timeout: optional timeout in seconds :return: (stdout, stderr, returncode) tuple """ if log_prefix: if not isinstance(log_prefix, str): log_prefix = str(log_prefix) if log_prefix[-1] != ' ': log_prefix += ' ' logger.info('%sexecuting: %s', log_prefix, ' '.join(cmd)) p_cmd = Popen(cmd, stdout=PIPE, stderr=PIPE) if timeout > 0: timeout = timedelta(seconds=timeout) logger.debug('%swaiting at most %s for command to finish...', log_prefix, timeout) start_wait = datetime.now() while datetime.now() - start_wait < timeout: if p_cmd.poll() is not None: break time.sleep(1) if p_cmd.poll() is None: raise SrmException('%s%s did not finish within %s.' % (log_prefix, cmd, timeout)) stdout, stderr = communicate_returning_strings(p_cmd) if p_cmd.returncode != 0: logger.error('%s: cmd=%s stdout=%s stderr=%s', log_prefix, ' '.join(cmd), stdout, stderr) return stdout, stderr, p_cmd.returncode
def runCommand(cmdline, input=None): logger.info("runCommand starting: %s", cmdline) # Start command proc = subprocess.Popen(cmdline, stdin=subprocess.PIPE if input else None, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True, universal_newlines=True) # Feed input and wait for termination logger.debug("runCommand input: %s", input) stdout, _ = communicate_returning_strings(proc, input) logger.debug("runCommand output: %s", stdout) # Check exit status, bail on error if proc.returncode != 0: logger.warning("runCommand(%s) had exit status %s with output: %s", cmdline, proc.returncode, stdout) raise subprocess.CalledProcessError(proc.returncode, cmdline) # Return output return stdout.strip()
def _removePath(self, path, do_recurse=False): logger.info("Remove path: %s" % (path, )) # do various sanity checking to prevent accidental deletes if not isinstance(path, str): message = "Provided path is not a string" logger.error(message) return {'deleted': False, 'message': message, 'path': path} if not path: message = "Empty path provided" logger.error(message) return {'deleted': False, 'message': message, 'path': path} if '*' in path or '?' in path: message = "Invalid path '%s': No wildcards allowed" % (path, ) logger.error(message) return {'deleted': False, 'message': message, 'path': path} # remove any trailing slashes if len(path) > 1: path = path.rstrip('/') required_base_paths = [ self.path_resolver.projects_path, self.path_resolver.scratch_path, self.path_resolver.share_path ] if not any( path.startswith(base_path) for base_path in required_base_paths): message = "Invalid path '%s': Path does not start with any of the base paths: '%s'" % ( path, ' '.join(required_base_paths)) logger.error(message) return {'deleted': False, 'message': message, 'path': path} for base_path in required_base_paths: if path.startswith(base_path) and path[len(base_path):].count( '/') == 0: message = "Invalid path '%s': Path should be a subdir of '%s'" % ( path, base_path) logger.error(message) return {'deleted': False, 'message': message, 'path': path} if not self.path_resolver.pathExists(path): message = "Nothing to delete, path '%s' does not exist." % (path) logger.warn(message) return {'deleted': True, 'message': message, 'path': path} try: du_result = self._sqrpc.getDiskUsageForPath( path) if do_recurse else {} except RPCTimeoutException: du_result = {} if du_result.get('found'): logger.info("Attempting to delete %s in %s", du_result.get('disk_usage_readable', '?B'), path) else: logger.info("Attempting to delete %s", path) if do_recurse: # LustreFS on CEP4 like many small deletes better than one large tree delete # so, recurse into the sub_directories, # and take a small sleep in between so other processes (like observation datawriters) can access LustreFS # (we've seen observation data loss when deleting large trees) subdirs_result = self.path_resolver.getSubDirectories(path) if subdirs_result.get('found') and subdirs_result.get( 'sub_directories'): sub_directories = subdirs_result['sub_directories'] for subdir in sub_directories: subdir_path = os.path.join(path, subdir) self._removePath( subdir_path, do_recurse=False) #recurse only one level deep time.sleep(0.01) else: self._sendNotification(subject='PathDeleting', content={ 'path': path, 'size': du_result.get('disk_usage', 0) }) cmd = ['rm', '-rf', path] cmd = wrap_command_in_cep4_head_node_ssh_call_if_needed(cmd) logger.info(' '.join(cmd)) proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = communicate_returning_strings(proc) if proc.returncode == 0: message = "Deleted %s in '%s'" % (du_result.get( 'disk_usage_readable', '?B'), path) logger.info(message) if do_recurse: #only send notification if not recursing self._sendNotification(subject='PathDeleted', content={ 'deleted': True, 'path': path, 'message': message, 'size': du_result.get('disk_usage', 0) }) return { 'deleted': True, 'message': message, 'path': path, 'size': du_result.get('disk_usage', 0) } if do_recurse: #only send notification if not recursing self._sendNotification(subject='PathDeleted', content={ 'deleted': False, 'path': path, 'message': 'Failed to delete (part of) %s' % path }) logger.error(err) return { 'deleted': False, 'message': 'Failed to delete (part of) %s' % path, 'path': path }
def run(self, awimager_output, ms_per_image, sourcelist, target, output_image, minbaseline, maxbaseline, processed_ms_dir, fillrootimagegroup_exec, environment, sourcedb): self.environment.update(environment) """ :param awimager_output: Path to the casa image produced by awimager :param ms_per_image: The X (90) measurements set scheduled to create the image :param sourcelist: list of sources found in the image :param target: <unused> :param minbaseline: Minimum baseline used for the image :param maxbaseline: largest/maximum baseline used for the image :param processed_ms_dir: The X (90) measurements set actually used to create the image :param fillrootimagegroup_exec: Executable used to add image data to the hdf5 image :rtype: self.outputs['hdf5'] set to "succes" to signal node succes :rtype: self.outputs['image'] path to the produced hdf5 image """ with log_time(self.logger): ms_per_image_map = DataMap.load(ms_per_image) # ***************************************************************** # 1. add image info # Get all the files in the processed measurement dir file_list = os.listdir(processed_ms_dir) # TODO: BUG!! the meta data might contain files that were copied # but failed in imager_bbs processed_ms_paths = [] for item in ms_per_image_map: path = item.file ms_file_name = os.path.split(path)[1] #if the ms is in the processed dir (additional check) if (ms_file_name in file_list): # save the path processed_ms_paths.append( os.path.join(processed_ms_dir, ms_file_name)) #add the information the image try: addimg.addImagingInfo(awimager_output, processed_ms_paths, sourcedb, minbaseline, maxbaseline) except Exception as error: self.logger.warn("addImagingInfo Threw Exception:") self.logger.warn(error) # Catch raising of already done error: allows for rerunning # of the recipe if "addImagingInfo already done" in str(error): pass else: raise Exception(error) #The majority of the tables is updated correctly # *************************************************************** # 2. convert to hdf5 image format output_directory = None pim_image = pim.image(awimager_output) try: self.logger.info( "Saving image in HDF5 Format to: {0}".format(output_image)) # Create the output directory output_directory = os.path.dirname(output_image) create_directory(output_directory) # save the image pim_image.saveas(output_image, hdf5=True) except Exception as error: self.logger.error( "Exception raised inside pyrap.images: {0}".format( str(error))) raise error # Convert to fits # create target location fits_output = output_image + ".fits" # To allow reruns a possible earlier version needs to be removed! # image2fits fails if not done!! if os.path.exists(fits_output): os.unlink(fits_output) try: temp_dir = tempfile.mkdtemp(suffix=".%s" % (os.path.basename(__file__), )) with CatchLog4CPlus( temp_dir, self.logger.name + '.' + os.path.basename(awimager_output), "image2fits") as logger: catch_segfaults([ "image2fits", '-in', awimager_output, '-out', fits_output ], temp_dir, self.environment, logger) except Exception as excp: self.logger.error(str(excp)) return 1 finally: shutil.rmtree(temp_dir) # **************************************************************** # 3. Filling of the HDF5 root group command = [fillrootimagegroup_exec, output_image] self.logger.info(" ".join(command)) #Spawn a subprocess and connect the pipes proc = subprocess.Popen(command, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) (stdoutdata, stderrdata) = communicate_returning_strings(proc) exit_status = proc.returncode self.logger.info(stdoutdata) self.logger.info(stderrdata) #if copy failed log the missing file if exit_status != 0: self.logger.error( "Error using the fillRootImageGroup command" "see above lines. Exit status: {0}".format(exit_status)) return 1 # ***************************************************************** # 4 Export the fits image to the msss server url = "http://tanelorn.astron.nl:8000/upload" try: self.logger.info( "Starting upload of fits image data to server!") opener = urllib.request.build_opener(mph.MultipartPostHandler) filedata = {"file": open(fits_output, "rb")} opener.open(url, filedata, timeout=2) # HTTPError needs to be caught first. except urllib.error.HTTPError as httpe: self.logger.warn("HTTP status is: {0}".format(httpe.code)) self.logger.warn("failed exporting fits image to server") except urllib.error.URLError as urle: self.logger.warn(str(urle.reason)) self.logger.warn("failed exporting fits image to server") except Exception as exc: self.logger.warn(str(exc)) self.logger.warn("failed exporting fits image to server") # ***************************************************************** # 5. export the sourcelist to the msss server url = "http://tanelorn.astron.nl:8000/upload_srcs" try: # Copy file to output location new_sourcelist_path = output_image + ".sourcelist" if os.path.exists(new_sourcelist_path): os.unlink(new_sourcelist_path) shutil.copy(sourcelist, new_sourcelist_path) self.logger.info( "Starting upload of sourcelist data to server!") opener = urllib.request.build_opener(mph.MultipartPostHandler) filedata = {"file": open(new_sourcelist_path, "rb")} opener.open(url, filedata, timeout=2) # HTTPError needs to be caught first. except urllib.error.HTTPError as httpe: self.logger.warn("HTTP status is: {0}".format(httpe.code)) self.logger.warn("failed exporting sourcelist to server") except urllib.error.URLError as urle: self.logger.warn(str(urle.reason)) self.logger.warn("failed exporting sourcelist image to server") except Exception as exc: self.logger.warn(str(exc)) self.logger.warn("failed exporting sourcelist image to serve") self.outputs["hdf5"] = "succes" self.outputs["image"] = output_image return 0