Ejemplo n.º 1
0
    def path_exists(self, path):
        cmd = self.ssh_cmd + ['ls %s' % (path)]
        logger.info('ltacp %s: checking if source exists. executing: %s' %
                    (self.logId, ' '.join(cmd)))
        proc = Popen(cmd, stdout=PIPE, stderr=PIPE)
        self.started_procs[proc] = cmd

        # block until find is finished
        communicate_returning_strings(proc)
        del self.started_procs[proc]

        logger.info('ltacp %s: source %s %s' %
                    (self.logId, path,
                     'exists' if proc.returncode == 0 else 'does not exist'))
        return proc.returncode == 0
Ejemplo n.º 2
0
    def _run_bbs_control(self, bbs_parset, run_flag):
        """
        Run BBS Global Control and wait for it to finish. Return its return
        code.
        """
        self.logger.info("Running BBS GlobalControl")
        working_dir = tempfile.mkdtemp(suffix=".%s" %
                                       (os.path.basename(__file__), ))
        with CatchLog4CPlus(working_dir, self.logger.name + ".GlobalControl",
                            os.path.basename(self.inputs['control_exec'])):
            with utilities.log_time(self.logger):
                try:
                    bbs_control_process = utilities.spawn_process(
                        [self.inputs['control_exec'], bbs_parset, "0"],
                        self.logger,
                        cwd=working_dir,
                        env=self.environment)
                    # _monitor_process() needs a convenient kill() method.
                    bbs_control_process.kill = lambda: os.kill(
                        bbs_control_process.pid, signal.SIGKILL)
                except OSError as e:
                    self.logger.error("Failed to spawn BBS Control (%s)" %
                                      str(e))
                    self.killswitch.set()
                    return 1
                finally:
                    run_flag.set()

            returncode = self._monitor_process(bbs_control_process,
                                               "BBS Control")
            sout, serr = communicate_returning_strings(bbs_control_process)
        shutil.rmtree(working_dir)
        log_process_output(self.inputs['control_exec'], sout, serr,
                           self.logger)
        return returncode
Ejemplo n.º 3
0
    def _run_bbs_kernel(self, host, command, *arguments):
        """
        Run command with arguments on the specified host using ssh. Return its
        return code.

        The resultant process is monitored for failure; see
        _monitor_process() for details.
        """
        try:
            bbs_kernel_process = run_remote_command(self.config,
                                                    self.logger,
                                                    host,
                                                    command,
                                                    self.environment,
                                                    arguments=arguments)
        except OSError:
            self.logger.exception("BBS Kernel failed to start")
            self.killswitch.set()
            return 1
        result = self._monitor_process(bbs_kernel_process,
                                       "BBS Kernel on %s" % host)
        sout, serr = communicate_returning_strings(bbs_kernel_process)
        serr = serr.replace("Connection to %s closed.\r\n" % host, "")
        log_process_output("SSH session (BBS kernel)", sout, serr, self.logger)
        return result
Ejemplo n.º 4
0
def get_stations_rcu_mode(stations=None):
    '''
    Get the current rcu mode of a station.
    :param stations - string or list of strings: 1 or more station names, or lcu hostnames
    :return: dict with station rcu mode integer pairs
    '''

    if stations == None:
        stations = get_current_stations(as_host_names=True)
    elif isinstance(stations, str):
        stations = [stations]

    procs = {}
    for station in stations:
        cmd = ["rspctl", "--rcu | grep ON | awk '{ print $4 }' | grep mode | cut -c 6-6 | sort -u | head -n 1"]
        cmd = wrap_command_in_lcu_station_ssh_call(cmd, station, via_head=True)
        logger.debug('executing cmd: %s', ' '.join(cmd))
        proc = Popen(cmd, stdout=PIPE, stderr=PIPE)
        procs[station] = proc

    result = {}
    for station, proc in list(procs.items()):
        out, err = communicate_returning_strings(proc)

        if proc.returncode != 0:
            logger.warning("Could not determine rcu mode for station %s. sdterr=%s" % (station, err))

        try:
            rcu_mode = int(out.strip())
            logger.debug('station %s is in rcumode=%s', station, rcu_mode)
            result[station] = rcu_mode
        except ValueError:
            logger.warning("Could not determine rcu mode for station %s. sdterr=%s" % (station, err))

    return result
Ejemplo n.º 5
0
    def getSubDirectories(self, path):
        logger.debug('getSubDirectories(%s)', path)
        # get the subdirectories of the given path
        cmd = ['find', path.rstrip('/'), '-maxdepth', '1', '-type', 'd']
        cmd = wrap_command_in_cep4_head_node_ssh_call_if_needed(cmd)
        logger.debug(' '.join(cmd))
        proc = subprocess.Popen(cmd,
                                stdout=subprocess.PIPE,
                                stderr=subprocess.PIPE)
        out, err = communicate_returning_strings(proc)

        if proc.returncode != 0:
            # lfs puts it's error message in stdout
            logger.error(out + err)
            return {'found': False, 'path': path, 'message': out + err}

        # parse out, clean lines and skip first line which is path itself.
        lines = [l.strip() for l in out.split('\n')][1:]
        subdir_names = [
            l.split('/')[-1].strip().strip('/') for l in lines if l
        ]

        result = {'found': True, 'path': path, 'sub_directories': subdir_names}
        logger.debug('getSubDirectories(%s) result: %s', path, result)
        return result
Ejemplo n.º 6
0
def getTaskLogHtml(task_id):
    task = radb().getTask(task_id)

    cmd = []
    if task['type'] == 'pipeline':
        cmd = [
            'ssh', '*****@*****.**',
            'cat /data/log/pipeline-%s-*.log' % task['otdb_id']
        ]
    else:
        cmd = [
            'ssh', 'mcu001.control.lofar',
            'cat /opt/lofar/var/log/mcu001\\:ObservationControl\\[0\\]\\{%s\\}.log*'
            % task['otdb_id']
        ]

    logger.info(' '.join(cmd))

    proc = subprocess.Popen(cmd,
                            stdout=subprocess.PIPE,
                            stderr=subprocess.PIPE)
    out, err = communicate_returning_strings(proc)
    if proc.returncode == 0:
        return out, 200, {'Content-Type': 'text/plain; charset=utf-8'}
    else:
        return err, 500, {'Content-Type': 'text/plain; charset=utf-8'}
Ejemplo n.º 7
0
    def _create_parmdb(self, parmdb_executable, target_dir_path):
        """
        _create_parmdb, creates a parmdb_executable at the target_dir_path using 
        the suplied executable. Does not test for existence of target parent dir       
        returns 1 if parmdb_executable failed 0 otherwise
        """
        # Format the template string by inserting the target dir
        formatted_template = _TEMPLATE_PARMDB.format(target_dir_path)
        try:
            # Spawn a subprocess and connect the pipelines
            parmdbm_process = subprocess.Popen(parmdb_executable,
                                               stdin=subprocess.PIPE,
                                               stdout=subprocess.PIPE,
                                               stderr=subprocess.PIPE)
            # Send formatted template on stdin
            sout, serr = communicate_returning_strings(
                parmdbm_process, input=formatted_template)

            # Log the output
            log_process_output("parmdbm", sout, serr, self.logger)
        except OSError as oserror:
            self.logger.error("Failed to spawn parmdbm: {0}".format(
                str(oserror)))
            return 1

        return 0
Ejemplo n.º 8
0
def get_current_stations(station_group='today', as_host_names=True):
    '''
    Wrapper function around the amazing lcurun and stations.txt operators system.
    Get a list of the currently used station names, either as hostname, or as parset-like station name (default)
    :param station_group - string: one of the predefined operator station groups, like: 'today', 'today_nl', 'core', etc. Defaults to 'today' which means all active stations.
    :param as_host_names - bool: return the station names as ssh-able hostnames if True (like cs001c, cs002c). return the station names as parset-like VirtualInstrument.stationList names if False (like CS001, CS002).
    :return: the station names for the given station_group as ssh-able hostnames if as_host_names=True (like cs001c, cs002c) or as parset-like VirtualInstrument.stationList names if as_host_names=False (like CS001, CS002).
    '''
    cmd = ['cat', '/opt/operations/bin/stations.txt']
    cmd = wrap_command_in_lcu_head_node_ssh_call(cmd)
    logger.debug('executing cmd: %s', ' '.join(cmd))
    proc = Popen(cmd, stdout=PIPE, stderr=PIPE)
    out, err = communicate_returning_strings(proc)

    if proc.returncode != 0:
        raise LCURuntimeError("Could not fetch stations.txt file. sdterr=%s" % (err, ))

    station_file_lines = out.splitlines(False)
    station_group_filter = station_group.strip()+' '
    station_group_line = next(l for l in station_file_lines if l.startswith(station_group_filter))
    station_aliases = station_group_line.split(' ')[-1].split(',')
    station_hostnames = []
    for station_alias in station_aliases:
        # the alias mapping is at the top of the file, so the first matching line holds the mapping
        station_alias_line = next(l for l in station_file_lines if station_alias in l)
        station_hostname = station_alias_line.split()[0].strip()
        station_hostnames.append(station_hostname)

    if as_host_names:
        logger.info("station hostnames in group '%s': %s", station_group, ' '.join(station_hostnames))
        return station_hostnames

    station_names = [hostname2stationname(x) for x in station_hostnames]
    logger.info("stations in group '%s': %s", station_group, ' '.join(station_names))
    return station_names
Ejemplo n.º 9
0
    def GetSIP(self):
        try:
            if self.Type == "MoM":
                with self.momClient:
                    self.SIP = self.momClient.uploadDataAndGetSIP(self.ArchiveId,
                                                                self.ticket,
                                                                self.FileName,
                                                                self.PrimaryUri,
                                                                self.FileSize,
                                                                self.MD5Checksum,
                                                                self.Adler32Checksum,
                                                                validate=True)
            elif 'SIPLocation' in self.job: # job file might know where the sip is when it is not a MoM job
                try:
                    sip_host = self.job['SIPLocation'].split(':')[0]
                    sip_path = self.job['SIPLocation'].split(':')[1]

                    cmd = ['ssh', '-tt', '-n', '-x', '-q', '%s@%s' % (self.user, sip_host), 'cat %s' % sip_path]
                    logger.info("GetSIP for %s at SIPLocation %s - cmd %s" % (self.JobId, self.job['SIPLocation'], ' ' .join(cmd)))
                    p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
                    out, err = communicate_returning_strings(p)
                    if p.returncode != 0:
                        raise PipelineError('GetSIP error getting EoR SIP for %s: %s' % (self.JobId, out + err))

                    self.SIP = out

                    self.SIP = addIngestInfoToSIP(self.SIP, self.ticket, self.FileSize, self.MD5Checksum, self.Adler32Checksum)

                    self.SIP = self.SIP.replace('<stationType>Europe</stationType>','<stationType>International</stationType>')

                    #make sure the source in the SIP is the same as the type of the storageticket
                    self.SIP = re.compile('<source>eor</source>', re.IGNORECASE).sub('<source>%s</source>' % (self.Type,), self.SIP)

                    if not validateSIPAgainstSchema(self.SIP):
                        logger.error('Invalid SIP:\n%s', self.SIP)
                        raise Exception('SIP for %s does not validate against schema' % self.JobId)

                except:
                    logger.exception('Getting SIP from SIPLocation %s failed', self.job['SIPLocation'])
                    raise

                logger.info('SIP received for %s from SIPLocation %s with size %d (%s): \n%s' % (self.JobId,
                                                                                                self.job['SIPLocation'],
                                                                                                len(self.SIP),
                                                                                                humanreadablesize(len(self.SIP)),
                                                                                                self.SIP[0:1024]))
            else:
                self.SIP = makeSIP(self.Project, self.ObsId, self.ArchiveId, self.ticket, self.FileName, self.FileSize, self.MD5Checksum, self.Adler32Checksum, self.Type)
                self.FileType = FILE_TYPE_UNSPECIFIED
        except Exception as e:
            if self.minimal_SIP:
                logger.info('making minimal SIP for %s', self.JobId)
                self.SIP = makeSIP(self.Project, self.ObsId, self.ArchiveId, self.ticket, self.FileName, self.FileSize, self.MD5Checksum, self.Adler32Checksum, self.Type)
                logger.info('minimal SIP for %s: \n%s', self.JobId, self.SIP)
                self.FileType = FILE_TYPE_UNSPECIFIED
            else:
                raise
Ejemplo n.º 10
0
def disk_usage(*paths):
    """
    Return the disk usage in bytes by the file(s) in ``paths``.
    """
    cmd = ['du', '-s', '-b']
    proc = subprocess.Popen(cmd + list(paths), stdout=subprocess.PIPE)
    sout = communicate_returning_strings(proc)[0]
    if sout:
        return sum([int(s.split('\t')[0]) for s in sout.strip().split('\n')])
    else:
        return 0
Ejemplo n.º 11
0
    def _removeRemoteFifo(self):
        if hasattr(self, 'remote_data_fifo') and self.remote_data_fifo:
            '''remove a file (or fifo) on a remote host. Test if file exists before deleting.'''
            cmd_remote_ls = self.ssh_cmd + [
                'ls %s' % (self.remote_data_fifo, )
            ]
            p_remote_ls = Popen(cmd_remote_ls, stdout=PIPE, stderr=PIPE)
            communicate_returning_strings(p_remote_ls)

            if p_remote_ls.returncode == 0:
                cmd_remote_rm = self.ssh_cmd + [
                    'rm %s' % (self.remote_data_fifo, )
                ]
                logger.info('ltacp %s: removing remote fifo. executing: %s' %
                            (self.logId, ' '.join(cmd_remote_rm)))
                p_remote_rm = Popen(cmd_remote_rm, stdout=PIPE, stderr=PIPE)
                communicate_returning_strings(p_remote_rm)
                if p_remote_rm.returncode != 0:
                    logger.error("Could not remove remote fifo %s@%s:%s\n%s" %
                                 (self.src_user, self.src_host,
                                  self.remote_data_fifo, p_remote_rm.stderr))
Ejemplo n.º 12
0
    def pathExists(self, path):
        cmd = ['lfs', 'ls', path]
        cmd = wrap_command_in_cep4_head_node_ssh_call_if_needed(cmd)
        logger.debug(' '.join(cmd))
        proc = subprocess.Popen(cmd,
                                stdout=subprocess.PIPE,
                                stderr=subprocess.PIPE)
        out, err = communicate_returning_strings(proc)

        if proc.returncode != 0 and 'No such file or directory' in err:
            return False

        return True
Ejemplo n.º 13
0
def getDiskUsageForPath_du(path):
    logger.info('getDiskUsageForPath_du(\'%s\')', path)

    result = {
        'found': False,
        'path': path,
        'disk_usage': None,
        'name': path.split('/')[-1]
    }

    cmd = ['du', '-bcs', path]
    cmd = wrap_command_in_cep4_head_node_ssh_call_if_needed(cmd)
    logger.info(' '.join(cmd))

    proc = subprocess.Popen(cmd,
                            stdout=subprocess.PIPE,
                            stderr=subprocess.PIPE)
    out, err = communicate_returning_strings(proc)

    if proc.returncode == 0:
        # example of out
        # 7025510839      /data/projects/HOLOG_WINDMILL_TESTS/L662734/uv/
        # 7025510839      total

        #parse out
        lines = [l.strip() for l in out.split('\n')]
        total_lines = [l for l in lines if 'total' in l]
        if total_lines:
            parts = [p.strip() for p in total_lines[0].split()]
            if len(parts) == 2:
                result['found'] = True
                result['disk_usage'] = int(parts[0])
                result['nr_of_files'] = None
    else:
        result['message'] = out + err
        result['found'] = False

        if 'No such file or directory' in err:
            logger.warning('No such file or directory: %s', path)
            result['disk_usage'] = 0
        else:
            logger.error(out + err)

    result['disk_usage_readable'] = humanreadablesize(result['disk_usage'])

    otdb_id = getOTDBIdFromPath(path)
    if otdb_id:
        result['otdb_id'] = otdb_id

    logger.info('getDiskUsageForPath_du(\'%s\') returning: %s', path, result)
    return result
Ejemplo n.º 14
0
    def path_mounted(self, path):
        logger.info(os.path.normpath(path))
        logger.info(os.path.normpath(path).strip().split(os.path.sep))
        root_dir = os.path.sep + [
            dir
            for dir in os.path.normpath(path).strip().split(os.path.sep) if dir
        ][0]
        cmd = self.ssh_cmd + ['mount | grep %s' % (root_dir)]
        logger.info(
            "ltacp %s: checking if '%s' of path '%s' is mounted. executing: %s"
            % (self.logId, root_dir, path, ' '.join(cmd)))
        proc = Popen(cmd, stdout=PIPE, stderr=PIPE)
        self.started_procs[proc] = cmd

        # block until find is finished
        communicate_returning_strings(proc)
        del self.started_procs[proc]

        logger.info(
            "ltacp %s: '%s' of path '%s' %s" %
            (self.logId, root_dir, path,
             'is mounted' if proc.returncode == 0 else 'is not mounted'))
        return proc.returncode == 0
Ejemplo n.º 15
0
    def can_logon_to_source_host(self):
        cmd_login_to_source_host = self.ssh_cmd + ['true']
        logger.info('ltacp %s: logging in to source host. executing: %s' %
                    (self.logId, ' '.join(cmd_login_to_source_host)))
        proc = Popen(cmd_login_to_source_host, stdout=PIPE, stderr=PIPE)
        self.started_procs[proc] = cmd_login_to_source_host

        # block until find is finished
        out, err = communicate_returning_strings(proc)
        del self.started_procs[proc]

        if proc.returncode == 0:
            logger.info('ltacp %s: can login to %s@%s', self.logId,
                        self.src_user, self.src_host)
            return True

        logger.error('ltacp %s: cannot login to %s@%s error: %s', self.logId,
                     self.src_user, self.src_host, err)
        return False
Ejemplo n.º 16
0
    def go(self):
        self.logger.info("Starting example recipe run")
        super(example, self).go()

        self.logger.info("This is a log message")

        my_process = subprocess.Popen([self.inputs['executable']],
                                      stdout=subprocess.PIPE,
                                      stderr=subprocess.PIPE)
        sout, serr = communicate_returning_strings(my_process)
        self.outputs['stdout'] = sout
        log_process_output(self.inputs['executable'], sout, serr, self.logger)

        if my_process.returncode == 0:
            return 0
        else:
            self.logger.warn("Return code (%d) is not 0." %
                             my_process.returncode)
            return 1
Ejemplo n.º 17
0
def read_initscript(logger, filename, shell="/bin/sh"):
    """
    Return a dict of the environment after sourcing the given script in a shell.
    """
    if not os.path.exists(filename):
        logger.warn("Environment initialisation script not found!")
        return {}
    else:
        logger.debug("Reading environment from %s" % filename)
        p = subprocess.Popen(['. %s ; env' % (filename)],
                             shell=True,
                             executable=shell,
                             stdout=subprocess.PIPE,
                             stderr=subprocess.PIPE,
                             close_fds=True)
        so, se = communicate_returning_strings(p)
        environment = [x.split('=', 1) for x in so.strip().split('\n')]
        environment = [x for x in environment if len(x) == 2]
        return dict(environment)
Ejemplo n.º 18
0
    def CheckForValidSIP(self):
        if self.Type == "MoM":
            try:
                with self.momClient:
                    self.momClient.getSIP(self.ArchiveId, validate=True, log_prefix=self.JobId)

            except Exception as e:
                logger.log(logging.WARNING if self.minimal_SIP else logging.ERROR,
                           'CheckForValidSIP: Getting SIP from MoM failed for %s: %s', self.JobId, e)
                if not self.minimal_SIP:
                    raise

        elif 'SIPLocation' in self.job: # job file might know where the sip is when it is not a MoM job
            try:
                sip_host = self.job['SIPLocation'].split(':')[0]
                sip_path = self.job['SIPLocation'].split(':')[1]

                cmd = ['ssh', '-tt', '-n', '-x', '-q', '%s@%s' % (self.user, sip_host), 'cat %s' % sip_path]
                logger.info("GetSIP for %s at SIPLocation %s - cmd %s" % (self.JobId, self.job['SIPLocation'], ' ' .join(cmd)))
                p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
                out, err = communicate_returning_strings(p)
                if p.returncode != 0:
                    raise PipelineError('GetSIP error getting EoR SIP for %s: %s' % (self.JobId, out + err))

                tmp_SIP = out

                tmp_SIP = addIngestInfoToSIP(tmp_SIP, self.ticket, self.FileSize, self.MD5Checksum, self.Adler32Checksum)

                tmp_SIP = tmp_SIP.replace('<stationType>Europe</stationType>','<stationType>International</stationType>')

                #make sure the source in the SIP is the same as the type of the storageticket
                tmp_SIP = re.compile('<source>eor</source>', re.IGNORECASE).sub('<source>%s</source>' % (self.Type,), tmp_SIP)

                if not validateSIPAgainstSchema(tmp_SIP):
                    logger.error('CheckForValidSIP: Invalid SIP:\n%s', tmp_SIP)
                    raise Exception('SIP for %s does not validate against schema' % self.JobId)

            except:
                logger.exception('CheckForValidSIP: Getting SIP from SIPLocation %s failed', self.job['SIPLocation'])
                raise

        logger.info('SIP for %s is valid, can proceed with transfer' % (self.JobId,))
Ejemplo n.º 19
0
    def done(self):
        if self.completed:
            return True

        if self.output_streams:
            return False

        # Process is finished, read remaining data and exit code
        (stdout, stderr) = communicate_returning_strings(self.process)
        self.exit_status = self.process.returncode

        self._addoutput(self.STDOUT, stdout, flush=True)
        self._addoutput(self.STDERR, stderr, flush=True)

        self.completed = True

        self.logger("Subprocess completed with exit status %d: %s" %
                    (self.exit_status, " ".join(self.cmd)))

        return True
Ejemplo n.º 20
0
def createNetCatCmd(listener, user=None, host=None):
    '''helper method to determine the proper call syntax for netcat on host'''

    # nc has no version option or other ways to check it's version
    # so, just try the variants and pick the first one that does not fail
    if listener:
        nc_variants = ['nc --recv-only', 'nc']
    else:
        nc_variants = ['nc -q 0', 'nc --send-only', 'nc']

    for nc_variant in nc_variants:
        cmd = nc_variant.split(' ')
        if user and host:
            cmd = ['ssh', '-n', '-x', '%s@%s' % (user, host)] + cmd
        p = Popen(cmd, stdout=PIPE, stderr=PIPE)
        out, err = communicate_returning_strings(p)
        if 'invalid option' not in err:
            return nc_variant

    raise LtacpException('could not determine remote netcat version')
Ejemplo n.º 21
0
def getDiskFreeSpaceForMountpoint(mountpoint=CEP4_DATA_MOUNTPOINT):
    logger.info('getDiskFreeSpaceForMountpoint(\'%s\')', mountpoint)

    result = {'found': False, 'mountpoint': mountpoint}

    cmd = ['df', mountpoint]
    cmd = wrap_command_in_cep4_head_node_ssh_call_if_needed(cmd)
    logger.info(' '.join(cmd) + ' ...waiting for result...')

    proc = subprocess.Popen(cmd,
                            stdout=subprocess.PIPE,
                            stderr=subprocess.PIPE)
    out, err = communicate_returning_strings(proc)

    if proc.returncode != 0:
        logger.error(out + err)
        result['message'] = out
        return result

    # example of out
    # Filesystem                                         1K-blocks          Used     Available Use% Mounted on
    # 10.134.233.65@o2ib:10.134.233.66@o2ib:/cep4-fs 3369564904320 1460036416928 1737591103048  46% /data

    #parse out
    lines = [l.strip() for l in out.split('\n')]
    data_line = next(l for l in lines if mountpoint in l)
    if data_line:
        parts = [p.strip() for p in data_line.split(' ')]

        result['found'] = True
        result['disk_size'] = 1024 * int(parts[1])
        result['disk_usage'] = 1024 * int(parts[2])
        result['disk_free'] = 1024 * int(parts[3])

        result['disk_size_readable'] = humanreadablesize(result['disk_size'])
        result['disk_usage_readable'] = humanreadablesize(result['disk_usage'])
        result['disk_free_readable'] = humanreadablesize(result['disk_free'])

    logger.info('getDiskFreeSpaceForMountpoint(\'%s\') returning: %s',
                mountpoint, result)
    return result
Ejemplo n.º 22
0
def catch_segfaults(cmd,
                    cwd,
                    env,
                    logger,
                    max=1,
                    cleanup=lambda: None,
                    usageStats=None):
    """
    Run cmd in cwd with env, sending output to logger.

    If it segfaults, retry upto max times.
    """
    tries = 0
    while tries <= max:
        if tries > 0:
            logger.debug("Retrying...")
        logger.debug("Running: %s" % (' '.join(cmd), ))
        process = spawn_process(cmd, logger, cwd, env)
        #add the created process to the usageStat object
        if usageStats:
            usageStats.addPID(process.pid)

        if 'casa' in cmd[0]:
            while process.returncode is None:
                process.poll()
                time.sleep(1)
        sout, serr = communicate_returning_strings(process)
        log_process_output(cmd[0], sout, serr, logger)
        if process.returncode == 0:
            break
        elif process.returncode == -11:
            logger.warn("%s process segfaulted!" % cmd[0])
            cleanup()
            tries += 1
            continue
        else:
            raise subprocess.CalledProcessError(process.returncode, cmd[0])
    if tries > max:
        logger.error("Too many segfaults from %s; aborted" % (cmd[0]))
        raise subprocess.CalledProcessError(process.returncode, cmd[0])
    return process
Ejemplo n.º 23
0
    def _ncListen(self, log_name):
        # pick initial random port for data receiver
        port = str(random.randint(49152, 65535))
        while True:
            # start listen for data stream
            cmd_listen = self.localNetCatCmd.split(' ') + ['-l', port]

            logger.info('ltacp %s: listening for %s. executing: %s' %
                        (self.logId, log_name, ' '.join(cmd_listen)))
            p_listen = Popen(cmd_listen, stdout=PIPE, stderr=PIPE)

            time.sleep(0.5)
            if p_listen.poll() is not None:
                # nc returned prematurely, pick another port to listen to
                o, e = communicate_returning_strings(p_listen)
                logger.info('ltacp %s: nc returned prematurely: %s' %
                            (self.logId, e.strip()))
                port = str(random.randint(49152, 65535))
            else:
                self.started_procs[p_listen] = cmd_listen
                return (p_listen, port)
Ejemplo n.º 24
0
def group_files(logger, clusterdesc, node_directory, group_size, filenames):
    """
    Group a list of files into blocks suitable for simultaneous
    processing, such that a limited number of processes run on any given
    host at a time.

    All node_directory on all compute nodes specified in clusterdesc is
    searched for any of the files listed in filenames. A generator is
    produced; on each call, no more than group_size files per node
    are returned.
    """
    # Given a limited number of processes per node, the first task is to
    # partition up the data for processing.
    logger.debug('Listing data on nodes')
    data = {}
    for node in get_compute_nodes(clusterdesc):
        logger.debug("Node: %s" % (node))
        exec_string = [
            "ssh", node, "--", "find", node_directory, "-maxdepth 1", "-print0"
        ]
        logger.debug("Executing: %s" % (" ".join(exec_string)))
        my_process = subprocess.Popen(exec_string,
                                      stdout=subprocess.PIPE,
                                      stderr=subprocess.PIPE)
        sout, _ = communicate_returning_strings(my_process)
        data[node] = sout.split('\x00')
        data[node] = utilities.group_iterable(
            [element for element in data[node] if element in filenames],
            group_size,
        )

    # Now produce an iterator which steps through the various chunks of
    # data to image, and image each chunk
    data_iterator = utilities.izip_longest(*list(data.values()))
    for data_chunk in data_iterator:
        to_process = []
        for node_data in data_chunk:
            if node_data:
                to_process.extend(node_data)
        yield to_process
Ejemplo n.º 25
0
    def is_soure_single_file(self):
        if isinstance(self.src_path, str):
            src_dirname = os.path.dirname(self.src_path)
            src_basename = os.path.basename(self.src_path)

            # get input filetype
            cmd_remote_filetype = self.ssh_cmd + [
                'stat -L -c %%F %s' %
                (os.path.join(src_dirname, src_basename), )
            ]
            logger.info('ltacp %s: determining source type. executing: %s' %
                        (self.logId, ' '.join(cmd_remote_filetype)))
            p_remote_filetype = Popen(cmd_remote_filetype,
                                      stdout=PIPE,
                                      stderr=PIPE)
            self.started_procs[p_remote_filetype] = cmd_remote_filetype

            # block until find is finished
            output_remote_filetype = communicate_returning_strings(
                p_remote_filetype)
            del self.started_procs[p_remote_filetype]
            if p_remote_filetype.returncode != 0:
                raise LtacpException(
                    'ltacp %s: determining source type failed: \nstdout: %s\nstderr: %s'
                    % (self.logId, output_remote_filetype[0],
                       output_remote_filetype[1]))

            for line in output_remote_filetype[0].split('\n'):
                if 'regular file' in line.strip():
                    logger.info('ltacp %s: remote path is a file' %
                                (self.logId, ))
                    return True

            logger.info('ltacp %s: remote path is a directory' % (self.logId))
            return False
        else:
            #self.src_path is a list of files/dirs, so it is not a single file
            logger.info('ltacp %s: remote path is a list of files/dirs' %
                        self.logId)
            return False
Ejemplo n.º 26
0
    def run(self):
        while not self.stopFlag.isSet():
            # *************************************
            # first add new to track pids to the active list
            # in a lock to assure correct functioning
            self.lock.acquire()
            if self.pid_in:
                self.pid_tracked.extend(self.pid_in)

                # initiate the location for save stat information
                for pid in self.pid_in:
                    self.pid_stats[pid] = []

                self.pid_in = []

            self.lock.release()

            (temp_file, temp_path) = tempfile.mkstemp()
            temp_file = open(temp_path, "w")
            temp_file.write(poller_string)
            temp_file.close()

            # now get stats for each tracked pid
            try:
                for pid in self.pid_tracked:
                    pps = subprocess.Popen(
                        ["bash", temp_path, str(pid)],
                        stdin=subprocess.PIPE,
                        stdout=subprocess.PIPE,
                        stderr=subprocess.PIPE)
                    out, err = communicate_returning_strings(pps)

                    parset_output = eval(
                        out.rstrip())  # remove trailing white space
                    self.pid_stats[pid].append(parset_output)
            finally:
                os.remove(temp_path)

            time.sleep(self.poll_interval)
Ejemplo n.º 27
0
def __execute(cmd, log_prefix='', timeout=-1):
    """ helper method, wrapper around subprocess.
    execute command and return (stdout, stderr, returncode) tuple
    :param cmd: a subprocess Popen cmd like list
    :param log_prefix: an optional prefix for all log lines (can be used to provide a unique identifier to filter log lines in a file)
    :param timeout: optional timeout in seconds
    :return: (stdout, stderr, returncode) tuple
    """
    if log_prefix:
        if not isinstance(log_prefix, str):
            log_prefix = str(log_prefix)
        if log_prefix[-1] != ' ':
            log_prefix += ' '

    logger.info('%sexecuting: %s', log_prefix, ' '.join(cmd))
    p_cmd = Popen(cmd, stdout=PIPE, stderr=PIPE)

    if timeout > 0:
        timeout = timedelta(seconds=timeout)
        logger.debug('%swaiting at most %s for command to finish...',
                     log_prefix, timeout)
        start_wait = datetime.now()
        while datetime.now() - start_wait < timeout:
            if p_cmd.poll() is not None:
                break
            time.sleep(1)

        if p_cmd.poll() is None:
            raise SrmException('%s%s did not finish within %s.' %
                               (log_prefix, cmd, timeout))

    stdout, stderr = communicate_returning_strings(p_cmd)

    if p_cmd.returncode != 0:
        logger.error('%s: cmd=%s stdout=%s stderr=%s', log_prefix,
                     ' '.join(cmd), stdout, stderr)

    return stdout, stderr, p_cmd.returncode
Ejemplo n.º 28
0
def runCommand(cmdline, input=None):
    logger.info("runCommand starting: %s", cmdline)

    # Start command
    proc = subprocess.Popen(cmdline,
                            stdin=subprocess.PIPE if input else None,
                            stdout=subprocess.PIPE,
                            stderr=subprocess.STDOUT,
                            shell=True,
                            universal_newlines=True)

    # Feed input and wait for termination
    logger.debug("runCommand input: %s", input)
    stdout, _ = communicate_returning_strings(proc, input)
    logger.debug("runCommand output: %s", stdout)

    # Check exit status, bail on error
    if proc.returncode != 0:
        logger.warning("runCommand(%s) had exit status %s with output: %s",
                       cmdline, proc.returncode, stdout)
        raise subprocess.CalledProcessError(proc.returncode, cmdline)

    # Return output
    return stdout.strip()
Ejemplo n.º 29
0
    def _removePath(self, path, do_recurse=False):
        logger.info("Remove path: %s" % (path, ))

        # do various sanity checking to prevent accidental deletes
        if not isinstance(path, str):
            message = "Provided path is not a string"
            logger.error(message)
            return {'deleted': False, 'message': message, 'path': path}

        if not path:
            message = "Empty path provided"
            logger.error(message)
            return {'deleted': False, 'message': message, 'path': path}

        if '*' in path or '?' in path:
            message = "Invalid path '%s': No wildcards allowed" % (path, )
            logger.error(message)
            return {'deleted': False, 'message': message, 'path': path}

        # remove any trailing slashes
        if len(path) > 1:
            path = path.rstrip('/')

        required_base_paths = [
            self.path_resolver.projects_path, self.path_resolver.scratch_path,
            self.path_resolver.share_path
        ]

        if not any(
                path.startswith(base_path)
                for base_path in required_base_paths):
            message = "Invalid path '%s': Path does not start with any of the base paths: '%s'" % (
                path, ' '.join(required_base_paths))
            logger.error(message)
            return {'deleted': False, 'message': message, 'path': path}

        for base_path in required_base_paths:
            if path.startswith(base_path) and path[len(base_path):].count(
                    '/') == 0:
                message = "Invalid path '%s': Path should be a subdir of '%s'" % (
                    path, base_path)
                logger.error(message)
                return {'deleted': False, 'message': message, 'path': path}

        if not self.path_resolver.pathExists(path):
            message = "Nothing to delete, path '%s' does not exist." % (path)
            logger.warn(message)
            return {'deleted': True, 'message': message, 'path': path}

        try:
            du_result = self._sqrpc.getDiskUsageForPath(
                path) if do_recurse else {}
        except RPCTimeoutException:
            du_result = {}

        if du_result.get('found'):
            logger.info("Attempting to delete %s in %s",
                        du_result.get('disk_usage_readable', '?B'), path)
        else:
            logger.info("Attempting to delete %s", path)

        if do_recurse:
            # LustreFS on CEP4 like many small deletes better than one large tree delete
            # so, recurse into the sub_directories,
            # and take a small sleep in between so other processes (like observation datawriters) can access LustreFS
            # (we've seen observation data loss when deleting large trees)
            subdirs_result = self.path_resolver.getSubDirectories(path)
            if subdirs_result.get('found') and subdirs_result.get(
                    'sub_directories'):
                sub_directories = subdirs_result['sub_directories']

                for subdir in sub_directories:
                    subdir_path = os.path.join(path, subdir)
                    self._removePath(
                        subdir_path,
                        do_recurse=False)  #recurse only one level deep
                    time.sleep(0.01)
        else:
            self._sendNotification(subject='PathDeleting',
                                   content={
                                       'path': path,
                                       'size': du_result.get('disk_usage', 0)
                                   })

        cmd = ['rm', '-rf', path]
        cmd = wrap_command_in_cep4_head_node_ssh_call_if_needed(cmd)
        logger.info(' '.join(cmd))
        proc = subprocess.Popen(cmd,
                                stdout=subprocess.PIPE,
                                stderr=subprocess.PIPE)
        out, err = communicate_returning_strings(proc)

        if proc.returncode == 0:
            message = "Deleted %s in '%s'" % (du_result.get(
                'disk_usage_readable', '?B'), path)
            logger.info(message)

            if do_recurse:
                #only send notification if not recursing
                self._sendNotification(subject='PathDeleted',
                                       content={
                                           'deleted': True,
                                           'path': path,
                                           'message': message,
                                           'size':
                                           du_result.get('disk_usage', 0)
                                       })

            return {
                'deleted': True,
                'message': message,
                'path': path,
                'size': du_result.get('disk_usage', 0)
            }

        if do_recurse:
            #only send notification if not recursing
            self._sendNotification(subject='PathDeleted',
                                   content={
                                       'deleted':
                                       False,
                                       'path':
                                       path,
                                       'message':
                                       'Failed to delete (part of) %s' % path
                                   })

        logger.error(err)

        return {
            'deleted': False,
            'message': 'Failed to delete (part of) %s' % path,
            'path': path
        }
Ejemplo n.º 30
0
    def run(self, awimager_output, ms_per_image, sourcelist, target,
            output_image, minbaseline, maxbaseline, processed_ms_dir,
            fillrootimagegroup_exec, environment, sourcedb):
        self.environment.update(environment)
        """
        :param awimager_output: Path to the casa image produced by awimager 
        :param ms_per_image: The X (90) measurements set scheduled to 
            create the image
        :param sourcelist: list of sources found in the image 
        :param target: <unused>
        :param minbaseline: Minimum baseline used for the image 
        :param maxbaseline: largest/maximum baseline used for the image
        :param processed_ms_dir: The X (90) measurements set actually used to 
            create the image
        :param fillrootimagegroup_exec: Executable used to add image data to
            the hdf5 image  
                 
        :rtype: self.outputs['hdf5'] set to "succes" to signal node succes
        :rtype: self.outputs['image'] path to the produced hdf5 image
        """
        with log_time(self.logger):
            ms_per_image_map = DataMap.load(ms_per_image)

            # *****************************************************************
            # 1. add image info
            # Get all the files in the processed measurement dir
            file_list = os.listdir(processed_ms_dir)
            # TODO: BUG!! the meta data might contain files that were copied
            # but failed in imager_bbs
            processed_ms_paths = []
            for item in ms_per_image_map:
                path = item.file
                ms_file_name = os.path.split(path)[1]
                #if the ms is in the processed dir (additional check)
                if (ms_file_name in file_list):
                    # save the path
                    processed_ms_paths.append(
                        os.path.join(processed_ms_dir, ms_file_name))
            #add the information the image
            try:
                addimg.addImagingInfo(awimager_output, processed_ms_paths,
                                      sourcedb, minbaseline, maxbaseline)

            except Exception as error:
                self.logger.warn("addImagingInfo Threw Exception:")
                self.logger.warn(error)
                # Catch raising of already done error: allows for rerunning
                # of the recipe
                if "addImagingInfo already done" in str(error):
                    pass
                else:
                    raise Exception(error)
                #The majority of the tables is updated correctly

            # ***************************************************************
            # 2. convert to hdf5 image format
            output_directory = None
            pim_image = pim.image(awimager_output)
            try:
                self.logger.info(
                    "Saving image in HDF5 Format to: {0}".format(output_image))
                # Create the output directory
                output_directory = os.path.dirname(output_image)
                create_directory(output_directory)
                # save the image
                pim_image.saveas(output_image, hdf5=True)

            except Exception as error:
                self.logger.error(
                    "Exception raised inside pyrap.images: {0}".format(
                        str(error)))
                raise error

            # Convert to fits
            # create target location
            fits_output = output_image + ".fits"
            # To allow reruns a possible earlier version needs to be removed!
            # image2fits fails if not done!!
            if os.path.exists(fits_output):
                os.unlink(fits_output)

            try:
                temp_dir = tempfile.mkdtemp(suffix=".%s" %
                                            (os.path.basename(__file__), ))
                with CatchLog4CPlus(
                        temp_dir, self.logger.name + '.' +
                        os.path.basename(awimager_output),
                        "image2fits") as logger:
                    catch_segfaults([
                        "image2fits", '-in', awimager_output, '-out',
                        fits_output
                    ], temp_dir, self.environment, logger)
            except Exception as excp:
                self.logger.error(str(excp))
                return 1
            finally:
                shutil.rmtree(temp_dir)

            # ****************************************************************
            # 3. Filling of the HDF5 root group
            command = [fillrootimagegroup_exec, output_image]
            self.logger.info(" ".join(command))
            #Spawn a subprocess and connect the pipes
            proc = subprocess.Popen(command,
                                    stdin=subprocess.PIPE,
                                    stdout=subprocess.PIPE,
                                    stderr=subprocess.PIPE)

            (stdoutdata, stderrdata) = communicate_returning_strings(proc)

            exit_status = proc.returncode
            self.logger.info(stdoutdata)
            self.logger.info(stderrdata)

            #if copy failed log the missing file
            if exit_status != 0:
                self.logger.error(
                    "Error using the fillRootImageGroup command"
                    "see above lines. Exit status: {0}".format(exit_status))

                return 1

            # *****************************************************************
            # 4 Export the fits image to the msss server
            url = "http://tanelorn.astron.nl:8000/upload"
            try:
                self.logger.info(
                    "Starting upload of fits image data to server!")
                opener = urllib.request.build_opener(mph.MultipartPostHandler)
                filedata = {"file": open(fits_output, "rb")}
                opener.open(url, filedata, timeout=2)

                # HTTPError needs to be caught first.
            except urllib.error.HTTPError as httpe:
                self.logger.warn("HTTP status is: {0}".format(httpe.code))
                self.logger.warn("failed exporting fits image to server")

            except urllib.error.URLError as urle:
                self.logger.warn(str(urle.reason))
                self.logger.warn("failed exporting fits image to server")

            except Exception as exc:
                self.logger.warn(str(exc))
                self.logger.warn("failed exporting fits image to server")

            # *****************************************************************
            # 5. export the sourcelist to the msss server
            url = "http://tanelorn.astron.nl:8000/upload_srcs"
            try:
                # Copy file to output location
                new_sourcelist_path = output_image + ".sourcelist"
                if os.path.exists(new_sourcelist_path):
                    os.unlink(new_sourcelist_path)

                shutil.copy(sourcelist, new_sourcelist_path)
                self.logger.info(
                    "Starting upload of sourcelist data to server!")
                opener = urllib.request.build_opener(mph.MultipartPostHandler)
                filedata = {"file": open(new_sourcelist_path, "rb")}
                opener.open(url, filedata, timeout=2)

                # HTTPError needs to be caught first.
            except urllib.error.HTTPError as httpe:
                self.logger.warn("HTTP status is: {0}".format(httpe.code))
                self.logger.warn("failed exporting sourcelist to server")

            except urllib.error.URLError as urle:
                self.logger.warn(str(urle.reason))
                self.logger.warn("failed exporting sourcelist image to server")

            except Exception as exc:
                self.logger.warn(str(exc))
                self.logger.warn("failed exporting sourcelist image to serve")

            self.outputs["hdf5"] = "succes"
            self.outputs["image"] = output_image

        return 0