def compile_inputs_outputs(runs, inputs, outputs): """Gives names to input/output files and creates InputOutputFile objects. """ # {path: (run_nb, arg_nb) or None} runs_with_file = {} # run_nb: number_of_file_arguments nb_file_args = [] # {path: [runs]} readers = {} writers = {} for run_nb, run, in_files, out_files in izip(count(), runs, inputs, outputs): # List which runs read or write each file for p in in_files: readers.setdefault(p, []).append(run_nb) for p in out_files: writers.setdefault(p, []).append(run_nb) # Locate files that appear on a run's command line files_set = set(in_files) | set(out_files) nb_files = 0 for arg_nb, arg in enumerate(run['argv']): p = Path(run['workingdir'], arg).resolve() if p in files_set: nb_files += 1 if p not in runs_with_file: runs_with_file[p] = run_nb, arg_nb elif runs_with_file[p] is not None: runs_with_file[p] = None nb_file_args.append(nb_files) file_names = {} make_unique = UniqueNames() for fi in flatten(2, (inputs, outputs)): if fi in file_names: continue # If it appears in at least one of the command-lines if fi in runs_with_file: # If it only appears once in the command-lines if runs_with_file[fi] is not None: run_nb, arg_nb = runs_with_file[fi] parts = [] # Run number, if there are more than one runs if len(runs) > 1: parts.append(run_nb) # Argument number, if there are more than one file arguments if nb_file_args[run_nb] > 1: parts.append(arg_nb) file_names[fi] = make_unique( 'arg%s' % '_'.join('%s' % s for s in parts)) else: file_names[fi] = make_unique('arg_%s' % fi.unicodename) else: file_names[fi] = make_unique(fi.unicodename) return dict((n, InputOutputFile(p, readers.get(p, []), writers.get(p, []))) for p, n in iteritems(file_names))
def write_configuration(directory, sort_packages, overwrite=False): """Writes the canonical YAML configuration file. """ database = directory / 'trace.sqlite3' if PY3: # On PY3, connect() only accepts unicode conn = sqlite3.connect(str(database)) else: conn = sqlite3.connect(database.path) conn.row_factory = sqlite3.Row # Reads info from database files, inputs, outputs = get_files(conn) # Identifies which file comes from which package if sort_packages: files, packages = identify_packages(files) else: packages = [] # Makes sure all the directories used as working directories are packed # (they already do if files from them are used, but empty directories do # not get packed inside a tar archive) files.update(d for d in list_directories(conn) if d.path.is_dir()) # Writes configuration file config = directory / 'config.yml' distribution = platform.linux_distribution()[0:2] oldconfig = not overwrite and config.exists() cur = conn.cursor() if oldconfig: # Loads in previous config runs, oldpkgs, oldfiles, patterns = load_config(config, canonical=False, File=TracedFile) # Here, additional patterns are discarded executions = cur.execute( ''' SELECT e.name, e.argv, e.envp, e.workingdir, p.exitcode FROM executed_files e INNER JOIN processes p on p.id=e.id WHERE p.parent ISNULL ORDER BY p.id DESC LIMIT 1; ''') inputs = inputs[-1:] files, packages = merge_files(files, packages, oldfiles, oldpkgs) else: runs = [] executions = cur.execute( ''' SELECT e.name, e.argv, e.envp, e.workingdir, p.exitcode FROM executed_files e INNER JOIN processes p on p.id=e.id WHERE p.parent ISNULL ORDER BY p.id; ''') for ((r_name, r_argv, r_envp, r_workingdir, r_exitcode), input_files, output_files) in izip(executions, inputs, outputs): # Decodes command-line argv = r_argv.split('\0') if not argv[-1]: argv = argv[:-1] # Decodes environment envp = r_envp.split('\0') if not envp[-1]: envp = envp[:-1] environ = dict(v.split('=', 1) for v in envp) # Gets files from command-line command_line_files = {} for i, arg in enumerate(argv): p = Path(r_workingdir, arg).resolve() if p.is_file(): command_line_files[p] = i input_files_on_cmdline = sum(1 for in_file in input_files if in_file in command_line_files) output_files_on_cmdline = sum(1 for out_file in input_files if out_file in command_line_files) # Labels input files input_files_dict = {} for in_file in input_files: # If file is on the command-line if in_file in command_line_files: if input_files_on_cmdline > 1: label = "arg_%d" % command_line_files[in_file] else: label = "arg" # Else, use file's name else: label = in_file.unicodename # Make labels unique uniquelabel = label i = 1 while uniquelabel in input_files_dict: i += 1 uniquelabel = '%s_%d' % (label, i) input_files_dict[uniquelabel] = str(in_file) # TODO : Note that right now, we keep as input files the ones that # don't appear on the command-line # Labels output files output_files_dict = {} for out_file in output_files: # If file is on the command-line if out_file in command_line_files: if output_files_on_cmdline > 1: label = "arg_%d" % command_line_files[out_file] else: label = "arg" # Else, use file's name else: label = out_file.unicodename # Make labels unique uniquelabel = label i = 1 while uniquelabel in output_files_dict: i += 1 uniquelabel = '%s_%d' % (label, i) output_files_dict[uniquelabel] = str(out_file) # TODO : Note that right now, we keep as output files the ones that # don't appear on the command-line runs.append({'binary': r_name, 'argv': argv, 'workingdir': Path(r_workingdir).path, 'architecture': platform.machine().lower(), 'distribution': distribution, 'hostname': platform.node(), 'system': [platform.system(), platform.release()], 'environ': environ, 'uid': os.getuid(), 'gid': os.getgid(), 'signal' if r_exitcode & 0x0100 else 'exitcode': r_exitcode & 0xFF, 'input_files': input_files_dict, 'output_files': output_files_dict}) cur.close() conn.close() save_config(config, runs, packages, files, reprozip_version) print("Configuration file written in {0!s}".format(config)) print("Edit that file then run the packer -- " "use 'reprozip pack -h' for help")
def write_configuration(directory, sort_packages, overwrite=False): """Writes the canonical YAML configuration file. """ database = directory / 'trace.sqlite3' if PY3: # On PY3, connect() only accepts unicode conn = sqlite3.connect(str(database)) else: conn = sqlite3.connect(database.path) conn.row_factory = sqlite3.Row # Reads info from database files, inputs, outputs = get_files(conn) # Identifies which file comes from which package if sort_packages: files, packages = identify_packages(files) else: packages = [] # Makes sure all the directories used as working directories are packed # (they already do if files from them are used, but empty directories do # not get packed inside a tar archive) files.update(d for d in list_directories(conn) if d.path.is_dir()) # Writes configuration file config = directory / 'config.yml' distribution = platform.linux_distribution()[0:2] oldconfig = not overwrite and config.exists() cur = conn.cursor() if not oldconfig: runs = [] # This gets all the top-level processes (p.parent ISNULL) and the first # executed file for that process (sorting by ids, which are # chronological) executions = cur.execute( ''' SELECT e.name, e.argv, e.envp, e.workingdir, p.exitcode FROM processes p JOIN executed_files e ON e.id=( SELECT id FROM executed_files e2 WHERE e2.process=p.id ORDER BY e2.id LIMIT 1 ) WHERE p.parent ISNULL; ''') else: # Loads in previous config runs, oldpkgs, oldfiles, patterns = load_config(config, canonical=False, File=TracedFile) # Here, additional patterns are discarded # Same query as previous block but only gets last process executions = cur.execute( ''' SELECT e.name, e.argv, e.envp, e.workingdir, p.exitcode FROM processes p JOIN executed_files e ON e.id=( SELECT id FROM executed_files e2 WHERE e2.process=p.id ORDER BY e2.id LIMIT 1 ) WHERE p.parent ISNULL ORDER BY p.id DESC LIMIT 1; ''') inputs = inputs[-1:] outputs = outputs[-1:] files, packages = merge_files(files, packages, oldfiles, oldpkgs) for ((r_name, r_argv, r_envp, r_workingdir, r_exitcode), input_files, output_files) in izip(executions, inputs, outputs): # Decodes command-line argv = r_argv.split('\0') if not argv[-1]: argv = argv[:-1] # Decodes environment envp = r_envp.split('\0') if not envp[-1]: envp = envp[:-1] environ = dict(v.split('=', 1) for v in envp) # Gets files from command-line command_line_files = {} for i, arg in enumerate(argv): p = Path(r_workingdir, arg).resolve() if p.is_file(): command_line_files[p] = i input_files_on_cmdline = sum(1 for in_file in input_files if in_file in command_line_files) output_files_on_cmdline = sum(1 for out_file in output_files if out_file in command_line_files) # Labels input files input_files_dict = {} for in_file in input_files: # If file is on the command-line if in_file in command_line_files: if input_files_on_cmdline > 1: label = "arg_%d" % command_line_files[in_file] else: label = "arg" # Else, use file's name else: label = in_file.unicodename # Make labels unique uniquelabel = label i = 1 while uniquelabel in input_files_dict: i += 1 uniquelabel = '%s_%d' % (label, i) input_files_dict[uniquelabel] = str(in_file) # TODO : Note that right now, we keep as input files the ones that # don't appear on the command-line # Labels output files output_files_dict = {} for out_file in output_files: # If file is on the command-line if out_file in command_line_files: if output_files_on_cmdline > 1: label = "arg_%d" % command_line_files[out_file] else: label = "arg" # Else, use file's name else: label = out_file.unicodename # Make labels unique uniquelabel = label i = 1 while uniquelabel in output_files_dict: i += 1 uniquelabel = '%s_%d' % (label, i) output_files_dict[uniquelabel] = str(out_file) # TODO : Note that right now, we keep as output files the ones that # don't appear on the command-line runs.append({'binary': r_name, 'argv': argv, 'workingdir': Path(r_workingdir).path, 'architecture': platform.machine().lower(), 'distribution': distribution, 'hostname': platform.node(), 'system': [platform.system(), platform.release()], 'environ': environ, 'uid': os.getuid(), 'gid': os.getgid(), 'signal' if r_exitcode & 0x0100 else 'exitcode': r_exitcode & 0xFF, 'input_files': input_files_dict, 'output_files': output_files_dict}) cur.close() conn.close() save_config(config, runs, packages, files, reprozip_version) print("Configuration file written in {0!s}".format(config)) print("Edit that file then run the packer -- " "use 'reprozip pack -h' for help")