def testrun(args): """testrun subcommand. Runs the command with the tracer using a temporary sqlite3 database, then reads it and dumps it out. Not really useful, except for debugging. """ fd, database = Path.tempfile(prefix='reprozip_', suffix='.sqlite3') os.close(fd) try: if args.arg0 is not None: argv = [args.arg0] + args.cmdline[1:] else: argv = args.cmdline logging.debug("Starting tracer, binary=%r, argv=%r", args.cmdline[0], argv) c = _pytracer.execute(args.cmdline[0], argv, database.path, args.verbosity) print("\n\n-----------------------------------------------------------" "--------------------") print_db(database) if c != 0: if c & 0x0100: print("\nWarning: program appears to have been terminated by " "signal %d" % (c & 0xFF)) else: print("\nWarning: program exited with non-zero code %d" % c) finally: database.remove()
def check_out(self, ref): """Check out the given revision. """ fd, temptar = Path.tempfile() os.close(fd) try: self._run(['symbolic-ref', 'HEAD', 'refs/heads/%s' % self.branch]) # Creates an archive from the tree self._run(['archive', '--format=tar', '-o', temptar.path, ref]) tar = tarfile.open(str(temptar), 'r') # List the files in the tree files = set(self.workdir / m.name for m in tar.getmembers()) # Remove from the directory all the files that don't exist removed_files = False for path in self.workdir.recursedir(top_down=False): if path.is_file() and path not in files: logging.info("Removing file %s", path) path.remove() removed_files = True elif path.is_dir(): if not path.listdir() and removed_files: logging.info("Removing empty directory %s", path) path.rmdir() removed_files = False # Replace all the files tar.extractall(str(self.workdir)) tar.close() finally: temptar.remove()
def setup_usage_report(name, version): """Sets up the usagestats module. """ global _usage_report # Unpack CA certificate fd, certificate_file = Path.tempfile(prefix='rpz_stats_ca_', suffix='.pem') with certificate_file.open('wb') as fp: fp.write(usage_report_ca) os.close(fd) atexit.register(os.remove, certificate_file.path) _usage_report = usagestats.Stats( '~/.reprozip/usage_stats', usagestats.Prompt(enable='%s usage_report --enable' % name, disable='%s usage_report --disable' % name), os.environ.get('REPROZIP_USAGE_URL', 'https://reprozip-stats.poly.edu/'), version='%s %s' % (name, version), unique_user_id=True, env_var='REPROZIP_USAGE_STATS', ssl_verify=certificate_file.path) try: os.getcwd().encode('ascii') except (UnicodeEncodeError, UnicodeDecodeError): record_usage(cwd_ascii=False) else: record_usage(cwd_ascii=True)
def testrun(args): """testrun subcommand. Runs the command with the tracer using a temporary sqlite3 database, then reads it and dumps it out. Not really useful, except for debugging. """ fd, database = Path.tempfile(prefix='reprozip_', suffix='.sqlite3') os.close(fd) try: if args.arg0 is not None: argv = [args.arg0] + args.cmdline[1:] else: argv = args.cmdline logger.debug("Starting tracer, binary=%r, argv=%r", args.cmdline[0], argv) c = _pytracer.execute(args.cmdline[0], argv, database.path) print("\n\n-----------------------------------------------------------" "--------------------") print_db(database) if c != 0: if c & 0x0100: print("\nWarning: program appears to have been terminated by " "signal %d" % (c & 0xFF)) else: print("\nWarning: program exited with non-zero code %d" % c) return c finally: database.remove()
def with_trace(self): """Context manager that extracts the trace database to a temporary file. """ fd, tmp = Path.tempfile(prefix='reprounzip_') os.close(fd) self.extract_trace(tmp) yield tmp tmp.remove()
def download_and_print(self, remote_path): # Download to temporary file fd, temp = Path.tempfile(prefix='reprozip_output_') os.close(fd) self.download(remote_path, temp) # Output to stdout with temp.open('rb') as fp: copyfile(fp, stdout_bytes) temp.remove()
def get_reprozip_ca_certificate(): """Gets the ReproZip CA certificate filename. """ fd, certificate_file = Path.tempfile(prefix='rpz_stats_ca_', suffix='.pem') with certificate_file.open('wb') as fp: fp.write(usage_report_ca) os.close(fd) atexit.register(os.remove, certificate_file.path) return certificate_file
def test_tempfile(self): """Tests tempfile.""" fd, f = Path.tempfile() os.close(fd) try: self.assertTrue(f.exists()) self.assertTrue(f.is_file()) self.assertTrue(f.is_absolute) finally: f.remove() self.assertFalse(f.exists())
def download_and_print(self, remote_path): # Download to temporary file fd, temp = Path.tempfile(prefix='reprozip_output_') os.close(fd) download_status = self.download(remote_path, temp) if download_status is not None and not download_status: return False # Output to stdout with temp.open('rb') as fp: copyfile(fp, stdout_bytes) temp.remove() return True
def download_and_print(self, remote_path): # Download to temporary file fd, temp = Path.tempfile(prefix='reprozip_output_') os.close(fd) self.download(remote_path, temp) # Output to stdout with temp.open('rb') as fp: chunk = fp.read(1024) if chunk: sys.stdout.buffer.write(chunk) while len(chunk) == 1024: chunk = fp.read(1024) if chunk: sys.stdout.buffer.write(chunk) temp.remove()
def process_connection_file(original): with original.open('r') as fp: data = json.load(fp) data['ip'] = '0.0.0.0' # Kernel should listen on all interfaces ports = [value for key, value in data.items() if key.endswith('_port')] fd, fixed_file = Path.tempfile(suffix='.json') with fixed_file.open('w') as fp: json.dump(data, fp) os.close(fd) yield fixed_file, ports fixed_file.remove()
def do_dot_test(self, expected, **kwargs): graph.Process._id_gen = 0 fd, target = Path.tempfile(prefix='rpz_testgraph_', suffix='.dot') os.close(fd) try: graph.generate(target, self._trace / 'config.yml', self._trace / 'trace.sqlite3', **kwargs) if expected is False: self.fail("DOT generation didn't fail as expected") with target.open('r') as fp: self.assertEqual(expected, fp.read()) except UsageError: if expected is not False: raise finally: target.remove()
def do_json_test(self, expected, **kwargs): graph.Process._id_gen = 0 fd, target = Path.tempfile(prefix='rpz_testgraph_', suffix='.json') os.close(fd) try: graph.generate(target, self._trace / 'config.yml', self._trace / 'trace.sqlite3', graph_format='json', **kwargs) if expected is False: self.fail("JSON generation didn't fail as expected") with target.open('r', encoding='utf-8') as fp: obj = json.load(fp) self.assertEqual(expected, obj) except SystemExit: if expected is not False: raise finally: target.remove()
def pack(target, directory, sort_packages): """Main function for the pack subcommand. """ if target.exists(): # Don't overwrite packs... logging.critical("Target file exists!") sys.exit(1) # Reads configuration configfile = directory / 'config.yml' if not configfile.is_file(): logging.critical("Configuration file does not exist!\n" "Did you forget to run 'reprozip trace'?\n" "If not, you might want to use --dir to specify an " "alternate location.") sys.exit(1) runs, packages, other_files, additional_patterns = load_config( configfile, canonical=False) # Canonicalize config (re-sort, expand 'additional_files' patterns) runs, packages, other_files = canonicalize_config(runs, packages, other_files, additional_patterns, sort_packages) logging.info("Creating pack %s...", target) tar = PackBuilder(target) # Stores the original trace trace = directory / 'trace.sqlite3' if trace.is_file(): tar.add(trace, Path('METADATA/trace.sqlite3')) # Add the files from the packages for pkg in packages: if pkg.packfiles: logging.info("Adding files from package %s...", pkg.name) files = [] for f in pkg.files: if not Path(f.path).exists(): logging.warning("Missing file %s from package %s", f.path, pkg.name) else: tar.add_data(f.path) files.append(f) pkg.files = files else: logging.info("NOT adding files from package %s", pkg.name) # Add the rest of the files logging.info("Adding other files...") files = set() for f in other_files: if not Path(f.path).exists(): logging.warning("Missing file %s", f.path) else: tar.add_data(f.path) files.add(f) other_files = files logging.info("Adding metadata...") # Stores pack version fd, manifest = Path.tempfile(prefix='reprozip_', suffix='.txt') os.close(fd) try: with manifest.open('wb') as fp: fp.write(b'REPROZIP VERSION 1\n') tar.add(manifest, Path('METADATA/version')) finally: manifest.remove() # Stores canonical config fd, can_configfile = Path.tempfile(suffix='.yml', prefix='rpz_config_') os.close(fd) try: save_config(can_configfile, runs, packages, other_files, reprozip_version, canonical=True) tar.add(can_configfile, Path('METADATA/config.yml')) finally: can_configfile.remove() tar.close()
def combine_traces(traces, target): """Combines multiple trace databases into one. The runs from the original traces are appended ('run_id' field gets translated to avoid conflicts). :param traces: List of trace database filenames. :type traces: [Path] :param target: Directory where to write the new database and associated configuration file. :type target: Path """ # We are probably overwriting on of the traces we're reading, so write to # a temporary file first then move it fd, output = Path.tempfile('.sqlite3', 'reprozip_combined_') if PY3: # On PY3, connect() only accepts unicode conn = sqlite3.connect(str(output)) else: conn = sqlite3.connect(output.path) os.close(fd) conn.row_factory = sqlite3.Row # Create the schema create_schema(conn) # Temporary database with lookup tables conn.execute( ''' ATTACH DATABASE '' AS maps; ''') conn.execute( ''' CREATE TABLE maps.map_runs( old INTEGER NOT NULL, new INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT ); ''') conn.execute( ''' CREATE TABLE maps.map_processes( old INTEGER NOT NULL, new INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT ); ''') # Do the merge for other in traces: logging.info("Attaching database %s", other) # Attach the other trace conn.execute( ''' ATTACH DATABASE ? AS trace; ''', (str(other),)) # Add runs to lookup table conn.execute( ''' INSERT INTO maps.map_runs(old) SELECT DISTINCT run_id AS old FROM trace.processes ORDER BY run_id; ''') logging.info( "%d rows in maps.map_runs", list(conn.execute('SELECT COUNT(*) FROM maps.map_runs;'))[0][0]) # Add processes to lookup table conn.execute( ''' INSERT INTO maps.map_processes(old) SELECT id AS old FROM trace.processes ORDER BY id; ''') logging.info( "%d rows in maps.map_processes", list(conn.execute('SELECT COUNT(*) FROM maps.map_processes;')) [0][0]) # processes logging.info("Insert processes...") conn.execute( ''' INSERT INTO processes(id, run_id, parent, timestamp, is_thread, exitcode) SELECT p.new AS id, r.new AS run_id, parent, timestamp, is_thread, exitcode FROM trace.processes t INNER JOIN maps.map_runs r ON t.run_id = r.old INNER JOIN maps.map_processes p ON t.id = p.old ORDER BY t.id; ''') # opened_files logging.info("Insert opened_files...") conn.execute( ''' INSERT INTO opened_files(run_id, name, timestamp, mode, is_directory, process) SELECT r.new AS run_id, name, timestamp, mode, is_directory, p.new AS process FROM trace.opened_files t INNER JOIN maps.map_runs r ON t.run_id = r.old INNER JOIN maps.map_processes p ON t.process = p.old ORDER BY t.id; ''') # executed_files logging.info("Insert executed_files...") conn.execute( ''' INSERT INTO executed_files(name, run_id, timestamp, process, argv, envp, workingdir) SELECT name, r.new AS run_id, timestamp, p.new AS process, argv, envp, workingdir FROM trace.executed_files t INNER JOIN maps.map_runs r ON t.run_id = r.old INNER JOIN maps.map_processes p ON t.process = p.old ORDER BY t.id; ''') # Flush maps conn.execute( ''' DELETE FROM maps.map_runs; ''') conn.execute( ''' DELETE FROM maps.map_processes; ''') # Detach conn.execute( ''' DETACH DATABASE trace; ''') conn.execute( ''' DETACH DATABASE maps; ''') conn.commit() conn.close() # Move database to final destination if not target.exists(): target.mkdir() output.move(target / 'trace.sqlite3')
def combine_traces(traces, target): """Combines multiple trace databases into one. The runs from the original traces are appended ('run_id' field gets translated to avoid conflicts). :param traces: List of trace database filenames. :type traces: [Path] :param target: Directory where to write the new database and associated configuration file. :type target: Path """ # We are probably overwriting on of the traces we're reading, so write to # a temporary file first then move it fd, output = Path.tempfile('.sqlite3', 'reprozip_combined_') if PY3: # On PY3, connect() only accepts unicode conn = sqlite3.connect(str(output)) else: conn = sqlite3.connect(output.path) os.close(fd) conn.row_factory = sqlite3.Row # Create the schema create_schema(conn) # Temporary database with lookup tables conn.execute(''' ATTACH DATABASE '' AS maps; ''') conn.execute(''' CREATE TABLE maps.map_runs( old INTEGER NOT NULL, new INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT ); ''') conn.execute(''' CREATE TABLE maps.map_processes( old INTEGER NOT NULL, new INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT ); ''') # Do the merge for other in traces: logging.info("Attaching database %s", other) # Attach the other trace conn.execute( ''' ATTACH DATABASE ? AS trace; ''', (str(other), )) # Add runs to lookup table conn.execute(''' INSERT INTO maps.map_runs(old) SELECT DISTINCT run_id AS old FROM trace.processes ORDER BY run_id; ''') logging.info( "%d rows in maps.map_runs", list(conn.execute('SELECT COUNT(*) FROM maps.map_runs;'))[0][0]) # Add processes to lookup table conn.execute(''' INSERT INTO maps.map_processes(old) SELECT id AS old FROM trace.processes ORDER BY id; ''') logging.info( "%d rows in maps.map_processes", list(conn.execute('SELECT COUNT(*) FROM maps.map_processes;'))[0] [0]) # processes logging.info("Insert processes...") conn.execute(''' INSERT INTO processes(id, run_id, parent, timestamp, is_thread, exitcode) SELECT p.new AS id, r.new AS run_id, parent, timestamp, is_thread, exitcode FROM trace.processes t INNER JOIN maps.map_runs r ON t.run_id = r.old INNER JOIN maps.map_processes p ON t.id = p.old ORDER BY t.id; ''') # opened_files logging.info("Insert opened_files...") conn.execute(''' INSERT INTO opened_files(run_id, name, timestamp, mode, is_directory, process) SELECT r.new AS run_id, name, timestamp, mode, is_directory, p.new AS process FROM trace.opened_files t INNER JOIN maps.map_runs r ON t.run_id = r.old INNER JOIN maps.map_processes p ON t.process = p.old ORDER BY t.id; ''') # executed_files logging.info("Insert executed_files...") conn.execute(''' INSERT INTO executed_files(name, run_id, timestamp, process, argv, envp, workingdir) SELECT name, r.new AS run_id, timestamp, p.new AS process, argv, envp, workingdir FROM trace.executed_files t INNER JOIN maps.map_runs r ON t.run_id = r.old INNER JOIN maps.map_processes p ON t.process = p.old ORDER BY t.id; ''') # Flush maps conn.execute(''' DELETE FROM maps.map_runs; ''') conn.execute(''' DELETE FROM maps.map_processes; ''') # Detach conn.execute(''' DETACH DATABASE trace; ''') conn.execute(''' DETACH DATABASE maps; ''') conn.commit() conn.close() # Move database to final destination if not target.exists(): target.mkdir() output.move(target / 'trace.sqlite3')
def pack(target, directory, sort_packages): """Main function for the pack subcommand. """ if target.exists(): # Don't overwrite packs... logger.critical("Target file exists!") sys.exit(1) # Reads configuration configfile = directory / 'config.yml' if not configfile.is_file(): logger.critical("Configuration file does not exist!\n" "Did you forget to run 'reprozip trace'?\n" "If not, you might want to use --dir to specify an " "alternate location.") sys.exit(1) runs, packages, other_files = config = load_config( configfile, canonical=False) additional_patterns = config.additional_patterns inputs_outputs = config.inputs_outputs # Validate run ids run_chars = ('0123456789_-@() .:%' 'abcdefghijklmnopqrstuvwxyz' 'ABCDEFGHIJKLMNOPQRSTUVWXYZ') for i, run in enumerate(runs): if (any(c not in run_chars for c in run['id']) or all(c in string.digits for c in run['id'])): logger.critical("Illegal run id: %r (run number %d)", run['id'], i) sys.exit(1) # Canonicalize config (re-sort, expand 'additional_files' patterns) packages, other_files = canonicalize_config( packages, other_files, additional_patterns, sort_packages) logger.info("Creating pack %s...", target) tar = tarfile.open(str(target), 'w:') fd, tmp = Path.tempfile() os.close(fd) try: datatar = PackBuilder(tmp) # Add the files from the packages for pkg in packages: if pkg.packfiles: logger.info("Adding files from package %s...", pkg.name) files = [] for f in pkg.files: if not Path(f.path).exists(): logger.warning("Missing file %s from package %s", f.path, pkg.name) else: datatar.add_data(f.path) files.append(f) pkg.files = files else: logger.info("NOT adding files from package %s", pkg.name) # Add the rest of the files logger.info("Adding other files...") files = set() for f in other_files: if not Path(f.path).exists(): logger.warning("Missing file %s", f.path) else: datatar.add_data(f.path) files.add(f) other_files = files datatar.close() tar.add(str(tmp), 'DATA.tar.gz') finally: tmp.remove() logger.info("Adding metadata...") # Stores pack version fd, manifest = Path.tempfile(prefix='reprozip_', suffix='.txt') os.close(fd) try: with manifest.open('wb') as fp: fp.write(b'REPROZIP VERSION 2\n') tar.add(str(manifest), 'METADATA/version') finally: manifest.remove() # Stores the original trace trace = directory / 'trace.sqlite3' if not trace.is_file(): logger.critical("trace.sqlite3 is gone! Aborting") sys.exit(1) tar.add(str(trace), 'METADATA/trace.sqlite3') # Checks that input files are packed for name, f in iteritems(inputs_outputs): if f.read_runs and not Path(f.path).exists(): logger.warning("File is designated as input (name %s) but is not " "to be packed: %s", name, f.path) # Generates a unique identifier for the pack (for usage reports purposes) pack_id = str(uuid.uuid4()) # Stores canonical config fd, can_configfile = Path.tempfile(suffix='.yml', prefix='rpz_config_') os.close(fd) try: save_config(can_configfile, runs, packages, other_files, reprozip_version, inputs_outputs, canonical=True, pack_id=pack_id) tar.add(str(can_configfile), 'METADATA/config.yml') finally: can_configfile.remove() tar.close() # Record some info to the usage report record_usage_package(runs, packages, other_files, inputs_outputs, pack_id)
def run(self, files): reprounzip.common.record_usage(upload_files=len(files)) inputs_outputs = self.get_config().inputs_outputs # No argument: list all the input files and exit if not files: print("Input files:") for input_name in sorted(n for n, f in iteritems(inputs_outputs) if f.read_runs): assigned = self.input_files.get(input_name) if assigned is None: assigned = "(original)" elif assigned is False: assigned = "(not created)" elif assigned is True: assigned = "(generated)" else: assert isinstance(assigned, (bytes, unicode_)) print(" %s: %s" % (input_name, assigned)) return self.prepare_upload(files) try: # Upload files for filespec in files: filespec_split = filespec.rsplit(':', 1) if len(filespec_split) != 2: logging.critical("Invalid file specification: %r", filespec) sys.exit(1) local_path, input_name = filespec_split if input_name.startswith('/'): input_path = PosixPath(input_name) else: try: input_path = inputs_outputs[input_name].path except KeyError: logging.critical("Invalid input file: %r", input_name) sys.exit(1) temp = None if not local_path: # Restore original file from pack logging.debug("Restoring input file %s", input_path) fd, temp = Path.tempfile(prefix='reprozip_input_') os.close(fd) local_path = self.extract_original_input( input_name, input_path, temp) if local_path is None: temp.remove() logging.warning( "No original packed, can't restore " "input file %s", input_name) continue else: local_path = Path(local_path) logging.debug("Uploading file %s to %s", local_path, input_path) if not local_path.exists(): logging.critical("Local file %s doesn't exist", local_path) sys.exit(1) self.upload_file(local_path, input_path) if temp is not None: temp.remove() self.input_files.pop(input_name, None) else: self.input_files[input_name] = local_path.absolute().path finally: self.finalize()
def run(self, files): reprounzip.common.record_usage(upload_files=len(files)) input_files = dict( (n, f.path) for n, f in iteritems(self.get_config().inputs_outputs) if f.read_runs) # No argument: list all the input files and exit if not files: print("Input files:") for input_name in input_files: if self.input_files.get(input_name) is not None: assigned = self.input_files[input_name] else: assigned = "(original)" print(" %s: %s" % (input_name, assigned)) return self.prepare_upload(files) try: # Upload files for filespec in files: filespec_split = filespec.rsplit(':', 1) if len(filespec_split) != 2: logging.critical("Invalid file specification: %r", filespec) sys.exit(1) local_path, input_name = filespec_split try: input_path = input_files[input_name] except KeyError: logging.critical("Invalid input file: %r", input_name) sys.exit(1) temp = None if not local_path: # Restore original file from pack logging.debug("Restoring input file %s", input_path) fd, temp = Path.tempfile(prefix='reprozip_input_') os.close(fd) local_path = self.extract_original_input(input_name, input_path, temp) if local_path is None: temp.remove() logging.warning("No original packed, can't restore " "input file %s", input_name) continue else: local_path = Path(local_path) logging.debug("Uploading file %s to %s", local_path, input_path) if not local_path.exists(): logging.critical("Local file %s doesn't exist", local_path) sys.exit(1) self.upload_file(local_path, input_path) if temp is not None: temp.remove() self.input_files.pop(input_name, None) else: self.input_files[input_name] = local_path.absolute().path finally: self.finalize()
def run(self, files): reprounzip.common.record_usage(upload_files=len(files)) runs = self.get_runs_from_config() # No argument: list all the input files and exit if not files: print("Input files:") for i, run in enumerate(runs): if len(runs) > 1: print(" Run %d:" % i) for input_name in run['input_files']: if self.input_files.get(input_name) is not None: assigned = PosixPath(self.input_files[input_name]) else: assigned = "(original)" print(" %s: %s" % (input_name, assigned)) return self.prepare_upload(files) # Get the path of each input file all_input_files = {} for run in runs: all_input_files.update(run['input_files']) try: # Upload files for filespec in files: filespec_split = filespec.rsplit(':', 1) if len(filespec_split) != 2: logging.critical("Invalid file specification: %r", filespec) sys.exit(1) local_path, input_name = filespec_split try: input_path = PosixPath(all_input_files[input_name]) except KeyError: logging.critical("Invalid input file: %r", input_name) sys.exit(1) temp = None if not local_path: # Restore original file from pack logging.debug("Restoring input file %s", input_path) fd, temp = Path.tempfile(prefix='reprozip_input_') os.close(fd) local_path = self.extract_original_input( input_name, input_path, temp) else: local_path = Path(local_path) logging.debug("Uploading file %s to %s", local_path, input_path) if not local_path.exists(): logging.critical("Local file %s doesn't exist", local_path) sys.exit(1) self.upload_file(local_path, input_path) if temp is not None: temp.remove() self.input_files.pop(input_name, None) else: self.input_files[input_name] = local_path.absolute().path finally: self.finalize()
def pack(target, directory, sort_packages): """Main function for the pack subcommand. """ if target.exists(): # Don't overwrite packs... logger.critical("Target file exists!") sys.exit(1) # Reads configuration configfile = directory / 'config.yml' if not configfile.is_file(): logger.critical("Configuration file does not exist!\n" "Did you forget to run 'reprozip trace'?\n" "If not, you might want to use --dir to specify an " "alternate location.") sys.exit(1) runs, packages, other_files = config = load_config(configfile, canonical=False) additional_patterns = config.additional_patterns inputs_outputs = config.inputs_outputs # Validate run ids run_chars = ('0123456789_-@() .:%' 'abcdefghijklmnopqrstuvwxyz' 'ABCDEFGHIJKLMNOPQRSTUVWXYZ') for i, run in enumerate(runs): if (any(c not in run_chars for c in run['id']) or all(c in string.digits for c in run['id'])): logger.critical("Illegal run id: %r (run number %d)", run['id'], i) sys.exit(1) # Canonicalize config (re-sort, expand 'additional_files' patterns) packages, other_files = canonicalize_config(packages, other_files, additional_patterns, sort_packages) logger.info("Creating pack %s...", target) tar = tarfile.open(str(target), 'w:') fd, tmp = Path.tempfile() os.close(fd) try: datatar = PackBuilder(tmp) # Add the files from the packages for pkg in packages: if pkg.packfiles: logger.info("Adding files from package %s...", pkg.name) files = [] for f in pkg.files: if not Path(f.path).exists(): logger.warning("Missing file %s from package %s", f.path, pkg.name) else: datatar.add_data(f.path) files.append(f) pkg.files = files else: logger.info("NOT adding files from package %s", pkg.name) # Add the rest of the files logger.info("Adding other files...") files = set() for f in other_files: if not Path(f.path).exists(): logger.warning("Missing file %s", f.path) else: datatar.add_data(f.path) files.add(f) other_files = files datatar.close() tar.add(str(tmp), 'DATA.tar.gz') finally: tmp.remove() logger.info("Adding metadata...") # Stores pack version fd, manifest = Path.tempfile(prefix='reprozip_', suffix='.txt') os.close(fd) try: with manifest.open('wb') as fp: fp.write(b'REPROZIP VERSION 2\n') tar.add(str(manifest), 'METADATA/version') finally: manifest.remove() # Stores the original trace trace = directory / 'trace.sqlite3' if not trace.is_file(): logger.critical("trace.sqlite3 is gone! Aborting") sys.exit(1) tar.add(str(trace), 'METADATA/trace.sqlite3') # Checks that input files are packed for name, f in inputs_outputs.items(): if f.read_runs and not Path(f.path).exists(): logger.warning( "File is designated as input (name %s) but is not " "to be packed: %s", name, f.path) # Generates a unique identifier for the pack (for usage reports purposes) pack_id = str(uuid.uuid4()) # Stores canonical config fd, can_configfile = Path.tempfile(suffix='.yml', prefix='rpz_config_') os.close(fd) try: save_config(can_configfile, runs, packages, other_files, reprozip_version, inputs_outputs, canonical=True, pack_id=pack_id) tar.add(str(can_configfile), 'METADATA/config.yml') finally: can_configfile.remove() tar.close() # Record some info to the usage report record_usage_package(runs, packages, other_files, inputs_outputs, pack_id)
def run(self, files): runs = self.get_runs_from_config() # No argument: list all the input files and exit if not files: print("Input files:") for i, run in enumerate(runs): if len(runs) > 1: print(" Run %d:" % i) for input_name in run['input_files']: if self.input_files.get(input_name) is not None: assigned = PosixPath(self.input_files[input_name]) else: assigned = "(original)" print(" %s: %s" % (input_name, assigned)) return self.prepare_upload(files) # Get the path of each input file all_input_files = {} for run in runs: all_input_files.update(run['input_files']) try: # Upload files for filespec in files: filespec_split = filespec.rsplit(':', 1) if len(filespec_split) != 2: logging.critical("Invalid file specification: %r", filespec) sys.exit(1) local_path, input_name = filespec_split try: input_path = PosixPath(all_input_files[input_name]) except KeyError: logging.critical("Invalid input file: %r", input_name) sys.exit(1) temp = None if not local_path: # Restore original file from pack logging.debug("Restoring input file %s", input_path) fd, temp = Path.tempfile(prefix='reprozip_input_') os.close(fd) local_path = self.extract_original_input(input_name, input_path, temp) else: local_path = Path(local_path) logging.debug("Uploading file %s to %s", local_path, input_path) if not local_path.exists(): logging.critical("Local file %s doesn't exist", local_path) sys.exit(1) self.upload_file(local_path, input_path) if temp is not None: temp.remove() del self.input_files[input_name] else: self.input_files[input_name] = local_path.absolute().path finally: self.finalize()
def pack(target, directory, sort_packages): """Main function for the pack subcommand. """ if target.exists(): # Don't overwrite packs... logging.critical("Target file exists!") sys.exit(1) # Reads configuration configfile = directory / 'config.yml' if not configfile.is_file(): logging.critical("Configuration file does not exist!\n" "Did you forget to run 'reprozip trace'?\n" "If not, you might want to use --dir to specify an " "alternate location.") sys.exit(1) runs, packages, other_files, additional_patterns = load_config( configfile, canonical=False) # Canonicalize config (re-sort, expand 'additional_files' patterns) runs, packages, other_files = canonicalize_config( runs, packages, other_files, additional_patterns, sort_packages) logging.info("Creating pack %s...", target) tar = PackBuilder(target) # Stores the original trace trace = directory / 'trace.sqlite3' if trace.is_file(): tar.add(trace, Path('METADATA/trace.sqlite3')) # Add the files from the packages for pkg in packages: if pkg.packfiles: logging.info("Adding files from package %s...", pkg.name) files = [] for f in pkg.files: if not Path(f.path).exists(): logging.warning("Missing file %s from package %s", f.path, pkg.name) else: tar.add_data(f.path) files.append(f) pkg.files = files else: logging.info("NOT adding files from package %s", pkg.name) # Add the rest of the files logging.info("Adding other files...") files = set() for f in other_files: if not Path(f.path).exists(): logging.warning("Missing file %s", f.path) else: tar.add_data(f.path) files.add(f) other_files = files logging.info("Adding metadata...") # Stores pack version fd, manifest = Path.tempfile(prefix='reprozip_', suffix='.txt') os.close(fd) try: with manifest.open('wb') as fp: fp.write(b'REPROZIP VERSION 1\n') tar.add(manifest, Path('METADATA/version')) finally: manifest.remove() # Generates a unique identifier for the pack (for usage reports purposes) pack_id = str(uuid.uuid4()) # Stores canonical config fd, can_configfile = Path.tempfile(suffix='.yml', prefix='rpz_config_') os.close(fd) try: save_config(can_configfile, runs, packages, other_files, reprozip_version, canonical=True, pack_id=pack_id) tar.add(can_configfile, Path('METADATA/config.yml')) finally: can_configfile.remove() tar.close() # Record some info to the usage report record_usage_package(runs, packages, other_files, pack_id)