def download(self, remote_path, local_path): if self.use_chroot: remote_path = join_root(PosixPath('/experimentroot'), remote_path) temp = make_unique_name(b'reprozip_output_') rtemp = PosixPath('/vagrant') / temp ltemp = self.target / temp # Copy file to shared folder logging.info("Copying file to shared folder...") chan = self.ssh.get_transport().open_session() cp_cmd = '/bin/cp %s %s' % ( shell_escape(remote_path.path), shell_escape(rtemp.path)) chown_cmd = '/bin/chown vagrant %s' % shell_escape(rtemp.path) chmod_cmd = '/bin/chmod 644 %s' % shell_escape(rtemp.path) chan.exec_command('/usr/bin/sudo /bin/sh -c %s' % shell_escape( ' && '.join((cp_cmd, chown_cmd, chmod_cmd)))) if chan.recv_exit_status() != 0: logging.critical("Couldn't copy file in virtual machine") try: ltemp.remove() except OSError: pass return False # Move file to final destination try: ltemp.rename(local_path) except OSError as e: logging.critical("Couldn't download output file: %s\n%s", remote_path, str(e)) ltemp.remove() return False return True
def upload_file(self, local_path, input_path): if self.use_chroot: remote_path = join_root(PosixPath('/experimentroot'), PosixPath(input_path)) else: remote_path = input_path # Upload to a temporary file first logging.info("Uploading file via SCP...") rtemp = PosixPath(make_unique_name(b'/tmp/reprozip_input_')) self.client_scp.put(local_path.path, rtemp.path, recursive=False) # Move it logging.info("Moving file into place...") chan = self.ssh.get_transport().open_session() chown_cmd = '/bin/chown --reference=%s %s' % (shell_escape( remote_path.path), shell_escape(rtemp.path)) chmod_cmd = '/bin/chmod --reference=%s %s' % (shell_escape( remote_path.path), shell_escape(rtemp.path)) mv_cmd = '/bin/mv %s %s' % (shell_escape( rtemp.path), shell_escape(remote_path.path)) chan.exec_command('/usr/bin/sudo /bin/sh -c %s' % shell_escape(';'.join( (chown_cmd, chmod_cmd, mv_cmd)))) if chan.recv_exit_status() != 0: logging.critical("Couldn't move file in virtual machine") sys.exit(1) chan.close()
def remove_data_prefix(self, path): if not isinstance(path, PosixPath): path = PosixPath(path) components = path.components[1:] if not components: return path.__class__('') return path.__class__(*components)
def upload_file(self, local_path, input_path): if self.use_chroot: remote_path = join_root(PosixPath('/experimentroot'), input_path) else: remote_path = input_path temp = make_unique_name(b'reprozip_input_') ltemp = self.target / temp rtemp = PosixPath('/vagrant') / temp # Copy file to shared folder logging.info("Copying file to shared folder...") local_path.copyfile(ltemp) # Move it logging.info("Moving file into place...") chan = self.ssh.get_transport().open_session() chown_cmd = '/bin/chown --reference=%s %s' % (shell_escape( remote_path.path), shell_escape(rtemp.path)) chmod_cmd = '/bin/chmod --reference=%s %s' % (shell_escape( remote_path.path), shell_escape(rtemp.path)) mv_cmd = '/bin/mv %s %s' % (shell_escape( rtemp.path), shell_escape(remote_path.path)) chan.exec_command('/usr/bin/sudo /bin/sh -c %s' % shell_escape(' && '.join( (chown_cmd, chmod_cmd, mv_cmd)))) if chan.recv_exit_status() != 0: logging.critical("Couldn't move file in virtual machine") try: ltemp.remove() except OSError: pass sys.exit(1) chan.close()
def get_data(self, path): """Returns a tarfile.TarInfo object for the data path. Raises KeyError if no such path exists. """ path = PosixPath(path) path = join_root(PosixPath(b'DATA'), path) return copy.copy(self.data.getmember(path))
def normalize_path(path): """Normalize a path obtained from the database. """ # For some reason, os.path.normpath() keeps multiple leading slashes # We don't want this since it has no meaning on Linux path = PosixPath(path) if path.path.startswith(path._sep + path._sep): path = PosixPath(path.path[1:]) return path
def test_plus(self): """Tests the plus operator.""" self.assertEqual((PosixPath('some/file.txt') + '.bak').path, b'some/file.txt.bak') with self.assertRaises(TypeError): PosixPath('some/file.txt') + PosixPath('.bak') with self.assertRaises(ValueError): PosixPath('some/file.txt') + '.bak/kidding' with self.assertRaises(ValueError): PosixPath('some/file.txt') + '/backup'
def test_parts(self): """Tests parent, ancestor, name, stem, ext.""" relative = PosixPath(b'directory/users/r\xE9mi/file.txt') absolute = PosixPath(u'/some/other/thing.h\xE9h\xE9') self.assertEqual(relative.parent.path, b'directory/users/r\xE9mi') self.assertEqual(absolute.parent.path, b'/some/other') self.assertEqual(absolute.ancestor(10).path, b'/') self.assertEqual(relative.name, b'file.txt') self.assertEqual(absolute.name, b'thing.h\xC3\xA9h\xC3\xA9') self.assertEqual(absolute.unicodename, u'thing.h\xE9h\xE9') self.assertEqual(absolute.stem, b'thing') self.assertEqual(absolute.ext, b'.h\xC3\xA9h\xC3\xA9')
def test_str(self): """Tests getting string representations (repr/bytes/unicode).""" utf = PosixPath(b'/tmp/r\xC3\xA9mi') nonutf = PosixPath(b'/tmp/r\xE9mi') # repr() self.assertEqual(repr(utf), "PosixPath(b'/tmp/r\\xc3\\xa9mi')") self.assertEqual(repr(nonutf), "PosixPath(b'/tmp/r\\xe9mi')") # bytes() self.assertEqual(bytes(utf), b'/tmp/r\xC3\xA9mi') self.assertEqual(bytes(nonutf), b'/tmp/r\xE9mi') # unicode() self.assertEqual(unicode(utf), u'/tmp/r\xE9mi') self.assertEqual(unicode(nonutf), u'/tmp/r\uFFFDmi')
def test_comparisons(self): """Tests the comparison operators.""" self.assertTrue(WindowsPath('\\tmp') == WindowsPath('\\tmp')) self.assertFalse(WindowsPath('C:\\file') != 'c:\\FILE') self.assertTrue('c:\\FILE' == WindowsPath('C:\\file')) self.assertFalse(WindowsPath('C:\\file') == WindowsPath('C:\\dir')) self.assertFalse(WindowsPath('some/file') == PosixPath('some/file')) self.assertTrue(WindowsPath('path/to/file1') < 'path/to/file2') self.assertFalse('path/to/file1' >= WindowsPath('path/to/file2')) if PY3: with self.assertRaises(TypeError): WindowsPath('some/file') < PosixPath('other/file')
def _setup(self): """Actually installs the runtime. """ # Expands ~user in queue if self.queue.path[0:1] == b'/': queue = self.queue else: if self.queue.path[0:1] == b'~': output = self.check_output('echo %s' % escape_queue(self.queue)) queue = PosixPath(output.rstrip(b'\r\n')) else: output = self.check_output('pwd') queue = PosixPath(output.rstrip(b'\r\n')) / self.queue logger.debug("Resolved to %s", queue) # Select runtime if not self.setup_runtime: # Autoselect if self._call('which qsub', False)[0] == 0: logger.debug("qsub is available, using runtime 'pbs'") runtime = 'pbs' else: logger.debug("qsub not found, using runtime 'default'") runtime = 'default' else: runtime = self.setup_runtime if self.need_runtime is not None and runtime not in self.need_runtime: raise ValueError("About to setup runtime %s but that wouldn't " "match explicitely allowed runtimes" % runtime) logger.info("Installing runtime %s%s at %s", runtime, "" if self.setup_runtime else " (auto)", self.queue) # Uploads runtime scp_client = self.get_scp_client() filename = pkg_resources.resource_filename('tej', 'remotes/%s' % runtime) scp_client.put(filename, str(queue), recursive=True) logger.debug("Files uploaded") # Runs post-setup script self.check_call('/bin/sh %s' % shell_escape(queue / 'commands/setup')) logger.debug("Post-setup script done") self._queue = queue return queue
def json(self, process_map): name = "%d" % self.pid long_name = "%s (%d)" % (PosixPath(self.binary).components[-1] if self.binary else "-", self.pid) description = "%s\n%d" % (self.binary, self.pid) if self.parent is not None: if self.created == C_FORK: reason = "fork" elif self.created == C_EXEC: reason = "exec" elif self.created == C_FORKEXEC: reason = "fork+exec" else: assert False parent = [process_map[self.parent], reason] else: parent = None return { 'name': name, 'parent': parent, 'reads': [], 'writes': [], 'long_name': long_name, 'description': description }
def __init__(self, pack): self.pack = Path(pack) self.tar = tarfile.open(str(self.pack), 'r:*') f = self.tar.extractfile('METADATA/version') version = f.read() f.close() if version.startswith(b'REPROZIP VERSION '): try: version = int(version[17:].rstrip()) except ValueError: version = None if version in (1, 2): self.version = version self.data_prefix = PosixPath(b'DATA') else: raise ValueError( "Unknown format version %r (maybe you should upgrade " "reprounzip? I only know versions 1 and 2" % version) else: raise ValueError("File doesn't appear to be a RPZ pack") if self.version == 1: self.data = self.tar elif version == 2: self.data = tarfile.open( fileobj=self.tar.extractfile('DATA.tar.gz'), mode='r:*') else: assert False
def test_rel_path_to(self): """Tests the rel_path_to method.""" self.assertEqual( WindowsPath(u'\\var\\log\\apache2\\').rel_path_to( u'\\var\\www\\cat.jpg').path, u'..\\..\\www\\cat.jpg') self.assertEqual( WindowsPath(u'C:\\var\\log\\apache2\\').rel_path_to( u'C:\\tmp\\access.log').path, u'..\\..\\..\\tmp\\access.log') self.assertEqual( WindowsPath(u'var\\log').rel_path_to( u'var\\log\\apache2\\access.log').path, u'apache2\\access.log') self.assertEqual( WindowsPath(u'\\var\\log\\apache2').rel_path_to( u'\\var\\log\\apache2').path, u'.') self.assertEqual( WindowsPath(u'C:\\').rel_path_to( u'C:\\var\\log\\apache2\\access.log').path, u'var\\log\\apache2\\access.log') self.assertEqual( WindowsPath(u'\\tmp\\secretdir\\').rel_path_to(u'\\').path, u'..\\..') self.assertEqual( WindowsPath(u'C:\\tmp\\secretdir\\').rel_path_to( u'D:\\other\\file.txt').path, u'D:\\other\\file.txt') with self.assertRaises(TypeError): WindowsPath(u'C:\\mydir\\').rel_path_to(PosixPath('/tmp/file'))
def extract_original_input(self, input_name, input_path, temp): tar = tarfile.open(str(self.target / 'experiment.rpz'), 'r:*') member = tar.getmember(str(join_root(PosixPath('DATA'), input_path))) member.name = str(temp.name) tar.extract(member, str(temp.parent)) tar.close() return temp
def load_iofiles(config, runs): """Loads the inputs_outputs part of the configuration. This tests for duplicates, merge the lists of executions, and optionally loads from the runs for reprozip < 0.7 compatibility. """ files_list = config.get('inputs_outputs') or [] # reprozip < 0.7 compatibility: read input_files and output_files from runs if 'inputs_outputs' not in config: for i, run in enumerate(runs): for rkey, wkey in (('input_files', 'read_by_runs'), ('output_files', 'written_by_runs')): for k, p in iteritems(run.pop(rkey, {})): files_list.append({'name': k, 'path': p, wkey: [i]}) files = {} # name:str: InputOutputFile paths = {} # path:PosixPath: name:str required_keys = set(['name', 'path']) optional_keys = set(['read_by_runs', 'written_by_runs']) uniquenames = UniqueNames() for i, f in enumerate(files_list): keys = set(f) if (not keys.issubset(required_keys | optional_keys) or not keys.issuperset(required_keys)): raise InvalidConfig("File #%d has invalid keys") name = f['name'] if name.startswith('/'): logging.warning( "File name looks like a path: %s, prefixing with " ".", name) name = '.%s' % name path = PosixPath(f['path']) readers = sorted(f.get('read_by_runs', [])) writers = sorted(f.get('written_by_runs', [])) if name in files: if files[name].path != path: old_name, name = name, uniquenames(name) logging.warning( "File name appears multiple times: %s\n" "Using name %s instead", old_name, name) else: uniquenames.insert(name) if path in paths: if paths[path] == name: logging.warning("File appears multiple times: %s", name) else: logging.warning( "Two files have the same path (but different " "names): %s, %s\nUsing name %s", name, paths[path], paths[path]) name = paths[path] files[name].read_runs.update(readers) files[name].write_runs.update(writers) else: paths[path] = name files[name] = InputOutputFile(path, readers, writers) return files
def extract_original_input(self, input_name, input_path, temp): tar = tarfile.open(str(self.target / 'inputs.tar.gz'), 'r:*') member = tar.getmember(str(join_root(PosixPath(''), input_path))) member = copy.copy(member) member.name = str(temp.components[-1]) tar.extract(member, str(temp.parent)) tar.close() return temp
def _resolve_queue(self, queue, depth=0, links=None): """Finds the location of tej's queue directory on the server. The `queue` set when constructing this `RemoteQueue` might be relative to the home directory and might contain ``~user`` placeholders. Also, each queue may in fact be a link to another path (a file containing the string ``tejdir:``, a space, and a new pathname, relative to this link's location). """ if depth == 0: logger.debug("resolve_queue(%s)", queue) answer = self.check_output( 'if [ -d %(queue)s ]; then ' ' cd %(queue)s; echo "dir"; cat version; pwd; ' 'elif [ -f %(queue)s ]; then ' ' cat %(queue)s; ' 'else ' ' echo no; ' 'fi' % {'queue': escape_queue(queue)}) if answer == b'no': if depth > 0: logger.debug("Broken link at depth=%d", depth) else: logger.debug("Path doesn't exist") return None, depth elif answer.startswith(b'dir\n'): version, runtime, path = answer[4:].split(b'\n', 2) try: version = tuple( int(e) for e in version.decode('ascii', 'ignore').split('.')) except ValueError: version = 0, 0 if version[:2] != self.PROTOCOL_VERSION: raise QueueExists( msg="Queue exists and is using incompatible protocol " "version %s" % '.'.join('%s' % e for e in version)) path = PosixPath(path) runtime = runtime.decode('ascii', 'replace') if self.need_runtime is not None: if (self.need_runtime is not None and runtime not in self.need_runtime): raise QueueExists( msg="Queue exists and is using explicitely disallowed " "runtime %s" % runtime) logger.debug("Found directory at %s, depth=%d, runtime=%s", path, depth, runtime) return path, depth elif answer.startswith(b'tejdir: '): new = queue.parent / answer[8:] logger.debug("Found link to %s, recursing", new) if links is not None: links.append(queue) return self._resolve_queue(new, depth + 1) else: # pragma: no cover logger.debug("Server returned %r", answer) raise RemoteCommandFailure(msg="Queue resolution command failed " "in unexpected way")
def showfiles(args): """Writes out the input and output files. Works both for a pack file and for an extracted directory. """ pack = Path(args.pack[0]) if not pack.exists(): logging.critical("Pack or directory %s does not exist", pack) sys.exit(1) if pack.is_dir(): # Reads info from an unpacked directory runs, packages, other_files = load_config_file(pack / 'config.yml', canonical=True) # The '.reprounzip' file is a pickled dictionary, it contains the name # of the files that replaced each input file (if upload was used) with pack.open('rb', '.reprounzip') as fp: unpacked_info = pickle.load(fp) input_files = unpacked_info.get('input_files', {}) print("Input files:") for i, run in enumerate(runs): if len(runs) > 1: print(" Run %d:" % i) for input_name, path in iteritems(run['input_files']): print(" %s (%s)" % (input_name, path)) if input_files.get(input_name) is not None: assigned = PosixPath(input_files[input_name]) else: assigned = "(original)" print(" %s" % assigned) print("Output files:") for i, run in enumerate(runs): if len(runs) > 1: print(" Run %d:" % i) for output_name, path in iteritems(run['output_files']): print(" %s (%s)" % (output_name, path)) else: # pack.is_file() # Reads info from a pack file runs, packages, other_files = load_config(pack) print("Input files:") for i, run in enumerate(runs): if len(runs) > 1: print(" Run %d:" % i) for input_name, path in iteritems(run['input_files']): print(" %s (%s)" % (input_name, path)) print("Output files:") for i, run in enumerate(runs): if len(runs) > 1: print(" Run %d:" % i) for output_name, path in iteritems(run['output_files']): print(" %s (%s)" % (output_name, path))
def data_filenames(self): """Returns a set of filenames for all the data paths. Those paths begin with a slash / and the 'DATA' prefix has been removed. """ return set( PosixPath(m.name[4:]) for m in self.data.getmembers() if m.name.startswith('DATA/'))
def test_construct(self): """Tests building paths.""" self.assertEqual( WindowsPath(u'C:\\', WindowsPath('some/dir'), u'with', 'files.txt').path, u'C:\\some\\dir\\with\\files.txt') with self.assertRaises(TypeError): WindowsPath(WindowsPath('C:\\somedir'), PosixPath('file.sh')) self.assertEqual((WindowsPath(u'Users\\R\xE9mi/Desktop') / WindowsPath(b'pictures/m\xE9chant.jpg')).path, u'Users\\R\xE9mi\\Desktop\\pictures\\m\xE9chant.jpg')
def test_root(self): """Tests roots.""" a = PosixPath(b'some/relative/path') b = PosixPath(u'alsorelative') c = PosixPath(b'/this/is/absolute') d = PosixPath(u'/') def split_root(f): return tuple(p.path for p in f.split_root()) # FIXME : This behaves weirdly because of normpath(). Do we want this? self.assertEqual(split_root(a), (b'.', b'some/relative/path')) self.assertEqual(split_root(b), (b'.', b'alsorelative')) self.assertFalse(b.is_absolute) self.assertEqual(split_root(c), (b'/', b'this/is/absolute')) self.assertTrue(c.is_absolute) self.assertEqual(split_root(d), (b'/', b'.')) self.assertTrue(d.is_absolute) self.assertEqual(d.root.path, b'/')
def test_comparisons(self): """Tests the comparison operators.""" self.assertTrue(PosixPath(b'/tmp/r\xE9mi') == b'/tmp/r\xE9mi') self.assertTrue(PosixPath(b'/file') != b'/FILE') self.assertFalse(PosixPath(b'file') == PosixPath(b'dir')) self.assertFalse(WindowsPath('some/file') == PosixPath('some/file')) self.assertTrue(PosixPath(b'path/to/file1') < b'path/to/file2') self.assertFalse(b'path/to/file1' >= PosixPath(b'path/to/file2')) if PY3: with self.assertRaises(TypeError): WindowsPath('some/file') < PosixPath('other/file')
def download(self, remote_path, local_path): if self.use_chroot: remote_path = join_root(PosixPath('/experimentroot'), remote_path) try: self.client_scp.get(remote_path.path, local_path.path, recursive=False) except scp.SCPException as e: logging.critical("Couldn't download output file: %s\n%s", remote_path, str(e)) sys.exit(1)
def extract_original_input(self, input_name, input_path, temp): tar = tarfile.open(str(self.target / self.data_tgz), 'r:*') try: member = tar.getmember( str(join_root(PosixPath('DATA'), input_path))) except KeyError: return None member = copy.copy(member) member.name = str(temp.components[-1]) tar.extract(member, str(temp.parent)) tar.close() return temp
def status(self, job_id): """Gets the status of a previously-submitted job. """ check_jobid(job_id) queue = self._get_queue() if queue is None: raise QueueDoesntExist ret, output = self._call( '%s %s' % (shell_escape(queue / 'commands/status'), job_id), True) if ret == 0: directory, result = output.splitlines() result = result.decode('utf-8') return RemoteQueue.JOB_DONE, PosixPath(directory), result elif ret == 2: directory = output.splitlines()[0] return RemoteQueue.JOB_RUNNING, PosixPath(directory), None elif ret == 3: raise JobNotFound else: raise RemoteCommandFailure(command="commands/status", ret=ret)
def submit(self, job_id, directory, script=None): """Submits a job to the queue. If the runtime is not there, it will be installed. If it is a broken chain of links, error. """ if job_id is None: job_id = '%s_%s_%s' % (Path(directory).unicodename, self.destination['username'], make_unique_name()) else: check_jobid(job_id) queue = self._get_queue() if queue is None: queue = self._setup() if script is None: script = 'start.sh' # Create directory ret, target = self._call( '%s %s' % (shell_escape(queue / 'commands/new_job'), job_id), True) if ret == 4: raise JobAlreadyExists elif ret != 0: raise JobNotFound("Couldn't create job") target = PosixPath(target) logger.debug("Server created directory %s", target) # Upload to directory try: scp_client = self.get_scp_client() scp_client.put(str(Path(directory)), str(target), recursive=True) except BaseException as e: try: self.delete(job_id) except BaseException: raise e raise logger.debug("Files uploaded") # Submit job self.check_call('%s %s %s %s' % (shell_escape(queue / 'commands/submit'), job_id, shell_escape(target), shell_escape(script))) logger.info("Submitted job %s", job_id) return job_id
def test_construct(self): """Tests building paths.""" self.assertEqual(PosixPath('/', PosixPath(b'r\xE9mis/dir'), 'with', 'files.txt').path, b'/r\xE9mis/dir/with/files.txt') with self.assertRaises(TypeError): PosixPath('/tmp/test', WindowsPath('folder'), 'cat.gif') self.assertEqual((PosixPath(b'/tmp/dir') / PosixPath('r\xE9mis/files/')).path, b'/tmp/dir/r\xC3\xA9mis/files') if PY3: self.assertEqual(PosixPath('/tmp/r\uDCE9mi').path, b'/tmp/r\xE9mi') self.assertEqual((PosixPath(b'/home/test') / PosixPath('/var/log')).path, b'/var/log')
def filefilter(path): pathuni = unicode_(path) if any(f(pathuni) for f in ignore): logging.debug("IGN %s", pathuni) return None if not (replace or aggregates): return path for fi in replace: pathuni_ = fi(pathuni) if pathuni_ != pathuni: logging.debug("SUB %s -> %s", pathuni, pathuni_) pathuni = pathuni_ for prefix in aggregates or []: if pathuni.startswith(prefix): logging.debug("AGG %s -> %s", pathuni, prefix) pathuni = prefix break return PosixPath(pathuni)
def run(self, files): reprounzip.common.record_usage(download_files=len(files)) runs = self.get_runs_from_config() # No argument: list all the output files and exit if not files: print("Output files:") for i, run in enumerate(runs): if len(runs) > 1: print(" Run %d:" % i) for output_name in run['output_files']: print(" %s" % output_name) return self.prepare_download(files) # Get the path of each output file all_output_files = {} for run in runs: all_output_files.update(run['output_files']) try: # Download files for filespec in files: filespec_split = filespec.split(':', 1) if len(filespec_split) != 2: logging.critical("Invalid file specification: %r", filespec) sys.exit(1) output_name, local_path = filespec_split try: remote_path = PosixPath(all_output_files[output_name]) except KeyError: logging.critical("Invalid output file: %r", output_name) sys.exit(1) logging.debug("Downloading file %s", remote_path) if not local_path: self.download_and_print(remote_path) else: self.download(remote_path, Path(local_path)) finally: self.finalize()