Example #1
0
def pack(target, directory, sort_packages):
    """Main function for the pack subcommand.
    """
    if target.exists():
        # Don't overwrite packs...
        logger.critical("Target file exists!")
        sys.exit(1)

    # Reads configuration
    configfile = directory / 'config.yml'
    if not configfile.is_file():
        logger.critical("Configuration file does not exist!\n"
                        "Did you forget to run 'reprozip trace'?\n"
                        "If not, you might want to use --dir to specify an "
                        "alternate location.")
        sys.exit(1)
    runs, packages, other_files = config = load_config(
        configfile,
        canonical=False)
    additional_patterns = config.additional_patterns
    inputs_outputs = config.inputs_outputs

    # Validate run ids
    run_chars = ('0123456789_-@() .:%'
                 'abcdefghijklmnopqrstuvwxyz'
                 'ABCDEFGHIJKLMNOPQRSTUVWXYZ')
    for i, run in enumerate(runs):
        if (any(c not in run_chars for c in run['id']) or
                all(c in string.digits for c in run['id'])):
            logger.critical("Illegal run id: %r (run number %d)",
                            run['id'], i)
            sys.exit(1)

    # Canonicalize config (re-sort, expand 'additional_files' patterns)
    packages, other_files = canonicalize_config(
        packages, other_files, additional_patterns, sort_packages)

    logger.info("Creating pack %s...", target)
    tar = tarfile.open(str(target), 'w:')

    fd, tmp = Path.tempfile()
    os.close(fd)
    try:
        datatar = PackBuilder(tmp)
        # Add the files from the packages
        for pkg in packages:
            if pkg.packfiles:
                logger.info("Adding files from package %s...", pkg.name)
                files = []
                for f in pkg.files:
                    if not Path(f.path).exists():
                        logger.warning("Missing file %s from package %s",
                                       f.path, pkg.name)
                    else:
                        datatar.add_data(f.path)
                        files.append(f)
                pkg.files = files
            else:
                logger.info("NOT adding files from package %s", pkg.name)

        # Add the rest of the files
        logger.info("Adding other files...")
        files = set()
        for f in other_files:
            if not Path(f.path).exists():
                logger.warning("Missing file %s", f.path)
            else:
                datatar.add_data(f.path)
                files.add(f)
        other_files = files
        datatar.close()

        tar.add(str(tmp), 'DATA.tar.gz')
    finally:
        tmp.remove()

    logger.info("Adding metadata...")
    # Stores pack version
    fd, manifest = Path.tempfile(prefix='reprozip_', suffix='.txt')
    os.close(fd)
    try:
        with manifest.open('wb') as fp:
            fp.write(b'REPROZIP VERSION 2\n')
        tar.add(str(manifest), 'METADATA/version')
    finally:
        manifest.remove()

    # Stores the original trace
    trace = directory / 'trace.sqlite3'
    if not trace.is_file():
        logger.critical("trace.sqlite3 is gone! Aborting")
        sys.exit(1)
    tar.add(str(trace), 'METADATA/trace.sqlite3')

    # Checks that input files are packed
    for name, f in iteritems(inputs_outputs):
        if f.read_runs and not Path(f.path).exists():
            logger.warning("File is designated as input (name %s) but is not "
                           "to be packed: %s", name, f.path)

    # Generates a unique identifier for the pack (for usage reports purposes)
    pack_id = str(uuid.uuid4())

    # Stores canonical config
    fd, can_configfile = Path.tempfile(suffix='.yml', prefix='rpz_config_')
    os.close(fd)
    try:
        save_config(can_configfile, runs, packages, other_files,
                    reprozip_version,
                    inputs_outputs, canonical=True,
                    pack_id=pack_id)

        tar.add(str(can_configfile), 'METADATA/config.yml')
    finally:
        can_configfile.remove()

    tar.close()

    # Record some info to the usage report
    record_usage_package(runs, packages, other_files,
                         inputs_outputs,
                         pack_id)
Example #2
0
def pack(target, directory, sort_packages):
    """Main function for the pack subcommand.
    """
    if target.exists():
        # Don't overwrite packs...
        logging.critical("Target file exists!")
        sys.exit(1)

    # Reads configuration
    configfile = directory / 'config.yml'
    if not configfile.is_file():
        logging.critical("Configuration file does not exist!\n"
                         "Did you forget to run 'reprozip trace'?\n"
                         "If not, you might want to use --dir to specify an "
                         "alternate location.")
        sys.exit(1)
    runs, packages, other_files, additional_patterns = load_config(
        configfile, canonical=False)

    # Canonicalize config (re-sort, expand 'additional_files' patterns)
    runs, packages, other_files = canonicalize_config(runs, packages,
                                                      other_files,
                                                      additional_patterns,
                                                      sort_packages)

    logging.info("Creating pack %s...", target)
    tar = PackBuilder(target)

    # Stores the original trace
    trace = directory / 'trace.sqlite3'
    if trace.is_file():
        tar.add(trace, Path('METADATA/trace.sqlite3'))

    # Add the files from the packages
    for pkg in packages:
        if pkg.packfiles:
            logging.info("Adding files from package %s...", pkg.name)
            files = []
            for f in pkg.files:
                if not Path(f.path).exists():
                    logging.warning("Missing file %s from package %s", f.path,
                                    pkg.name)
                else:
                    tar.add_data(f.path)
                    files.append(f)
            pkg.files = files
        else:
            logging.info("NOT adding files from package %s", pkg.name)

    # Add the rest of the files
    logging.info("Adding other files...")
    files = set()
    for f in other_files:
        if not Path(f.path).exists():
            logging.warning("Missing file %s", f.path)
        else:
            tar.add_data(f.path)
            files.add(f)
    other_files = files

    logging.info("Adding metadata...")
    # Stores pack version
    fd, manifest = Path.tempfile(prefix='reprozip_', suffix='.txt')
    os.close(fd)
    try:
        with manifest.open('wb') as fp:
            fp.write(b'REPROZIP VERSION 1\n')
        tar.add(manifest, Path('METADATA/version'))
    finally:
        manifest.remove()

    # Stores canonical config
    fd, can_configfile = Path.tempfile(suffix='.yml', prefix='rpz_config_')
    os.close(fd)
    try:
        save_config(can_configfile,
                    runs,
                    packages,
                    other_files,
                    reprozip_version,
                    canonical=True)

        tar.add(can_configfile, Path('METADATA/config.yml'))
    finally:
        can_configfile.remove()

    tar.close()
Example #3
0
def write_configuration(directory,
                        sort_packages,
                        find_inputs_outputs,
                        overwrite=False):
    """Writes the canonical YAML configuration file.
    """
    database = directory / 'trace.sqlite3'

    assert database.is_file()
    conn = sqlite3.connect(str(database))  # connect() only accepts str
    conn.row_factory = sqlite3.Row

    # Reads info from database
    files, inputs, outputs = get_files(conn)

    # Identifies which file comes from which package
    if sort_packages:
        files, packages = identify_packages(files)
    else:
        packages = []

    # Writes configuration file
    config = directory / 'config.yml'
    distribution = [distro.id(), distro.version()]
    cur = conn.cursor()
    if overwrite or not config.exists():
        runs = []
        # This gets all the top-level processes (p.parent ISNULL) and the first
        # executed file for that process (sorting by ids, which are
        # chronological)
        executions = cur.execute('''
            SELECT e.name, e.argv, e.envp, e.workingdir,
                   p.timestamp, p.exit_timestamp, p.exitcode
            FROM processes p
            JOIN executed_files e ON e.id=(
                SELECT id FROM executed_files e2
                WHERE e2.process=p.id
                ORDER BY e2.id
                LIMIT 1
            )
            WHERE p.parent ISNULL;
            ''')
    else:
        # Loads in previous config
        runs, oldpkgs, oldfiles = load_config(config,
                                              canonical=False,
                                              File=TracedFile)

        # Same query as previous block but only gets last process
        executions = cur.execute(
            '''
            SELECT e.name, e.argv, e.envp, e.workingdir,
                   p.timestamp, p.exit_timestamp, p.exitcode
            FROM processes p
            JOIN executed_files e ON e.id=(
                SELECT id FROM executed_files e2
                WHERE e2.process=p.id
                ORDER BY e2.id
                LIMIT 1
            )
            WHERE p.parent ISNULL
            ORDER BY p.id
            LIMIT 2147483647 OFFSET ?;
            ''', (len(runs), ))
    for (r_name, r_argv, r_envp, r_workingdir, r_start, r_end,
         r_exitcode) in executions:
        # Decodes command-line
        argv = r_argv.split('\0')
        if not argv[-1]:
            argv = argv[:-1]

        # Decodes environment
        envp = r_envp.split('\0')
        if not envp[-1]:
            envp = envp[:-1]
        environ = dict(v.split('=', 1) for v in envp)

        run = {
            'id': "run%d" % len(runs),
            'binary': r_name,
            'argv': argv,
            'workingdir': str(Path(r_workingdir)),
            'architecture': platform.machine().lower(),
            'distribution': distribution,
            'hostname': platform.node(),
            'system': [platform.system(),
                       platform.release()],
            'environ': environ,
            'uid': os.getuid(),
            'gid': os.getgid()
        }

        if r_exitcode & 0x0100:
            run['signal'] = r_exitcode & 0xFF
        else:
            run['exitcode'] = r_exitcode & 0xFF

        if r_end is not None:
            run['walltime'] = (r_end - r_start) / 1.0E9  # ns to s

        runs.append(run)

    cur.close()
    conn.close()

    if find_inputs_outputs:
        inputs_outputs = compile_inputs_outputs(runs, inputs, outputs)
    else:
        inputs_outputs = {}

    save_config(config, runs, packages, files, reprozip_version,
                inputs_outputs)

    print("Configuration file written in {0!s}".format(config))
    print("Edit that file then run the packer -- "
          "use 'reprozip pack -h' for help")
Example #4
0
File: pack.py Project: lcw/reprozip
def pack(target, directory, sort_packages):
    """Main function for the pack subcommand.
    """
    if target.exists():
        # Don't overwrite packs...
        logger.critical("Target file exists!")
        sys.exit(1)

    # Reads configuration
    configfile = directory / 'config.yml'
    if not configfile.is_file():
        logger.critical("Configuration file does not exist!\n"
                        "Did you forget to run 'reprozip trace'?\n"
                        "If not, you might want to use --dir to specify an "
                        "alternate location.")
        sys.exit(1)
    runs, packages, other_files = config = load_config(configfile,
                                                       canonical=False)
    additional_patterns = config.additional_patterns
    inputs_outputs = config.inputs_outputs

    # Validate run ids
    run_chars = ('0123456789_-@() .:%'
                 'abcdefghijklmnopqrstuvwxyz'
                 'ABCDEFGHIJKLMNOPQRSTUVWXYZ')
    for i, run in enumerate(runs):
        if (any(c not in run_chars for c in run['id'])
                or all(c in string.digits for c in run['id'])):
            logger.critical("Illegal run id: %r (run number %d)", run['id'], i)
            sys.exit(1)

    # Canonicalize config (re-sort, expand 'additional_files' patterns)
    packages, other_files = canonicalize_config(packages, other_files,
                                                additional_patterns,
                                                sort_packages)

    logger.info("Creating pack %s...", target)
    tar = tarfile.open(str(target), 'w:')

    fd, tmp = Path.tempfile()
    os.close(fd)
    try:
        datatar = PackBuilder(tmp)
        # Add the files from the packages
        for pkg in packages:
            if pkg.packfiles:
                logger.info("Adding files from package %s...", pkg.name)
                files = []
                for f in pkg.files:
                    if not Path(f.path).exists():
                        logger.warning("Missing file %s from package %s",
                                       f.path, pkg.name)
                    else:
                        datatar.add_data(f.path)
                        files.append(f)
                pkg.files = files
            else:
                logger.info("NOT adding files from package %s", pkg.name)

        # Add the rest of the files
        logger.info("Adding other files...")
        files = set()
        for f in other_files:
            if not Path(f.path).exists():
                logger.warning("Missing file %s", f.path)
            else:
                datatar.add_data(f.path)
                files.add(f)
        other_files = files
        datatar.close()

        tar.add(str(tmp), 'DATA.tar.gz')
    finally:
        tmp.remove()

    logger.info("Adding metadata...")
    # Stores pack version
    fd, manifest = Path.tempfile(prefix='reprozip_', suffix='.txt')
    os.close(fd)
    try:
        with manifest.open('wb') as fp:
            fp.write(b'REPROZIP VERSION 2\n')
        tar.add(str(manifest), 'METADATA/version')
    finally:
        manifest.remove()

    # Stores the original trace
    trace = directory / 'trace.sqlite3'
    if not trace.is_file():
        logger.critical("trace.sqlite3 is gone! Aborting")
        sys.exit(1)
    tar.add(str(trace), 'METADATA/trace.sqlite3')

    # Checks that input files are packed
    for name, f in inputs_outputs.items():
        if f.read_runs and not Path(f.path).exists():
            logger.warning(
                "File is designated as input (name %s) but is not "
                "to be packed: %s", name, f.path)

    # Generates a unique identifier for the pack (for usage reports purposes)
    pack_id = str(uuid.uuid4())

    # Stores canonical config
    fd, can_configfile = Path.tempfile(suffix='.yml', prefix='rpz_config_')
    os.close(fd)
    try:
        save_config(can_configfile,
                    runs,
                    packages,
                    other_files,
                    reprozip_version,
                    inputs_outputs,
                    canonical=True,
                    pack_id=pack_id)

        tar.add(str(can_configfile), 'METADATA/config.yml')
    finally:
        can_configfile.remove()

    tar.close()

    # Record some info to the usage report
    record_usage_package(runs, packages, other_files, inputs_outputs, pack_id)
Example #5
0
def write_configuration(directory, sort_packages, find_inputs_outputs,
                        overwrite=False):
    """Writes the canonical YAML configuration file.
    """
    database = directory / 'trace.sqlite3'

    if PY3:
        # On PY3, connect() only accepts unicode
        conn = sqlite3.connect(str(database))
    else:
        conn = sqlite3.connect(database.path)
    conn.row_factory = sqlite3.Row

    # Reads info from database
    files, inputs, outputs = get_files(conn)

    # Identifies which file comes from which package
    if sort_packages:
        files, packages = identify_packages(files)
    else:
        packages = []

    # Writes configuration file
    config = directory / 'config.yml'
    distribution = platform.linux_distribution()[0:2]
    cur = conn.cursor()
    if overwrite or not config.exists():
        runs = []
        # This gets all the top-level processes (p.parent ISNULL) and the first
        # executed file for that process (sorting by ids, which are
        # chronological)
        executions = cur.execute(
            '''
            SELECT e.name, e.argv, e.envp, e.workingdir, p.exitcode
            FROM processes p
            JOIN executed_files e ON e.id=(
                SELECT id FROM executed_files e2
                WHERE e2.process=p.id
                ORDER BY e2.id
                LIMIT 1
            )
            WHERE p.parent ISNULL;
            ''')
    else:
        # Loads in previous config
        runs, oldpkgs, oldfiles = load_config(config,
                                              canonical=False,
                                              File=TracedFile)

        # Same query as previous block but only gets last process
        executions = cur.execute(
            '''
            SELECT e.name, e.argv, e.envp, e.workingdir, p.exitcode
            FROM processes p
            JOIN executed_files e ON e.id=(
                SELECT id FROM executed_files e2
                WHERE e2.process=p.id
                ORDER BY e2.id
                LIMIT 1
            )
            WHERE p.parent ISNULL
            ORDER BY p.id DESC
            LIMIT 1;
            ''')
        inputs = inputs[-1:]
        outputs = outputs[-1:]

        files, packages = merge_files(files, packages,
                                      oldfiles,
                                      oldpkgs)
    for r_name, r_argv, r_envp, r_workingdir, r_exitcode in executions:
        # Decodes command-line
        argv = r_argv.split('\0')
        if not argv[-1]:
            argv = argv[:-1]

        # Decodes environment
        envp = r_envp.split('\0')
        if not envp[-1]:
            envp = envp[:-1]
        environ = dict(v.split('=', 1) for v in envp)

        runs.append({'binary': r_name, 'argv': argv,
                     'workingdir': unicode_(Path(r_workingdir)),
                     'architecture': platform.machine().lower(),
                     'distribution': distribution,
                     'hostname': platform.node(),
                     'system': [platform.system(), platform.release()],
                     'environ': environ,
                     'uid': os.getuid(),
                     'gid': os.getgid(),
                     'signal' if r_exitcode & 0x0100 else 'exitcode':
                         r_exitcode & 0xFF})

    cur.close()
    conn.close()

    if find_inputs_outputs:
        inputs_outputs = compile_inputs_outputs(runs, inputs, outputs)
    else:
        inputs_outputs = {}

    save_config(config, runs, packages, files, reprozip_version,
                inputs_outputs)

    print("Configuration file written in {0!s}".format(config))
    print("Edit that file then run the packer -- "
          "use 'reprozip pack -h' for help")
Example #6
0
def pack(target, directory, sort_packages):
    """Main function for the pack subcommand.
    """
    if target.exists():
        # Don't overwrite packs...
        logging.critical("Target file exists!")
        sys.exit(1)

    # Reads configuration
    configfile = directory / 'config.yml'
    if not configfile.is_file():
        logging.critical("Configuration file does not exist!\n"
                         "Did you forget to run 'reprozip trace'?\n"
                         "If not, you might want to use --dir to specify an "
                         "alternate location.")
        sys.exit(1)
    runs, packages, other_files, additional_patterns = load_config(
            configfile,
            canonical=False)

    # Canonicalize config (re-sort, expand 'additional_files' patterns)
    runs, packages, other_files = canonicalize_config(
            runs, packages, other_files, additional_patterns, sort_packages)

    logging.info("Creating pack %s...", target)
    tar = PackBuilder(target)

    # Stores the original trace
    trace = directory / 'trace.sqlite3'
    if trace.is_file():
        tar.add(trace, Path('METADATA/trace.sqlite3'))

    # Add the files from the packages
    for pkg in packages:
        if pkg.packfiles:
            logging.info("Adding files from package %s...", pkg.name)
            files = []
            for f in pkg.files:
                if not Path(f.path).exists():
                    logging.warning("Missing file %s from package %s",
                                    f.path, pkg.name)
                else:
                    tar.add_data(f.path)
                    files.append(f)
            pkg.files = files
        else:
            logging.info("NOT adding files from package %s", pkg.name)

    # Add the rest of the files
    logging.info("Adding other files...")
    files = set()
    for f in other_files:
        if not Path(f.path).exists():
            logging.warning("Missing file %s", f.path)
        else:
            tar.add_data(f.path)
            files.add(f)
    other_files = files

    logging.info("Adding metadata...")
    # Stores pack version
    fd, manifest = Path.tempfile(prefix='reprozip_', suffix='.txt')
    os.close(fd)
    try:
        with manifest.open('wb') as fp:
            fp.write(b'REPROZIP VERSION 1\n')
        tar.add(manifest, Path('METADATA/version'))
    finally:
        manifest.remove()

    # Generates a unique identifier for the pack (for usage reports purposes)
    pack_id = str(uuid.uuid4())

    # Stores canonical config
    fd, can_configfile = Path.tempfile(suffix='.yml', prefix='rpz_config_')
    os.close(fd)
    try:
        save_config(can_configfile, runs, packages, other_files,
                    reprozip_version, canonical=True,
                    pack_id=pack_id)

        tar.add(can_configfile, Path('METADATA/config.yml'))
    finally:
        can_configfile.remove()

    tar.close()

    # Record some info to the usage report
    record_usage_package(runs, packages, other_files, pack_id)
Example #7
0
def write_configuration(directory, sort_packages, overwrite=False):
    """Writes the canonical YAML configuration file.
    """
    database = directory / 'trace.sqlite3'

    if PY3:
        # On PY3, connect() only accepts unicode
        conn = sqlite3.connect(str(database))
    else:
        conn = sqlite3.connect(database.path)
    conn.row_factory = sqlite3.Row

    # Reads info from database
    files, inputs, outputs = get_files(conn)

    # Identifies which file comes from which package
    if sort_packages:
        files, packages = identify_packages(files)
    else:
        packages = []

    # Makes sure all the directories used as working directories are packed
    # (they already do if files from them are used, but empty directories do
    # not get packed inside a tar archive)
    files.update(d for d in list_directories(conn) if d.path.is_dir())

    # Writes configuration file
    config = directory / 'config.yml'
    distribution = platform.linux_distribution()[0:2]
    oldconfig = not overwrite and config.exists()
    cur = conn.cursor()
    if oldconfig:
        # Loads in previous config
        runs, oldpkgs, oldfiles, patterns = load_config(config,
                                                        canonical=False,
                                                        File=TracedFile)
        # Here, additional patterns are discarded

        executions = cur.execute(
                '''
                SELECT e.name, e.argv, e.envp, e.workingdir, p.exitcode
                FROM executed_files e
                INNER JOIN processes p on p.id=e.id
                WHERE p.parent ISNULL
                ORDER BY p.id DESC
                LIMIT 1;
                ''')
        inputs = inputs[-1:]

        files, packages = merge_files(files, packages,
                                      oldfiles,
                                      oldpkgs)
    else:
        runs = []
        executions = cur.execute(
                '''
                SELECT e.name, e.argv, e.envp, e.workingdir, p.exitcode
                FROM executed_files e
                INNER JOIN processes p on p.id=e.id
                WHERE p.parent ISNULL
                ORDER BY p.id;
                ''')
    for ((r_name, r_argv, r_envp, r_workingdir, r_exitcode),
            input_files, output_files) in izip(executions, inputs, outputs):
        # Decodes command-line
        argv = r_argv.split('\0')
        if not argv[-1]:
            argv = argv[:-1]

        # Decodes environment
        envp = r_envp.split('\0')
        if not envp[-1]:
            envp = envp[:-1]
        environ = dict(v.split('=', 1) for v in envp)

        # Gets files from command-line
        command_line_files = {}
        for i, arg in enumerate(argv):
            p = Path(r_workingdir, arg).resolve()
            if p.is_file():
                command_line_files[p] = i
        input_files_on_cmdline = sum(1
                                     for in_file in input_files
                                     if in_file in command_line_files)
        output_files_on_cmdline = sum(1
                                      for out_file in input_files
                                      if out_file in command_line_files)

        # Labels input files
        input_files_dict = {}
        for in_file in input_files:
            # If file is on the command-line
            if in_file in command_line_files:
                if input_files_on_cmdline > 1:
                    label = "arg_%d" % command_line_files[in_file]
                else:
                    label = "arg"
            # Else, use file's name
            else:
                label = in_file.unicodename
            # Make labels unique
            uniquelabel = label
            i = 1
            while uniquelabel in input_files_dict:
                i += 1
                uniquelabel = '%s_%d' % (label, i)
            input_files_dict[uniquelabel] = str(in_file)
        # TODO : Note that right now, we keep as input files the ones that
        # don't appear on the command-line

        # Labels output files
        output_files_dict = {}
        for out_file in output_files:
            # If file is on the command-line
            if out_file in command_line_files:
                if output_files_on_cmdline > 1:
                    label = "arg_%d" % command_line_files[out_file]
                else:
                    label = "arg"
            # Else, use file's name
            else:
                label = out_file.unicodename
            # Make labels unique
            uniquelabel = label
            i = 1
            while uniquelabel in output_files_dict:
                i += 1
                uniquelabel = '%s_%d' % (label, i)
            output_files_dict[uniquelabel] = str(out_file)
        # TODO : Note that right now, we keep as output files the ones that
        # don't appear on the command-line

        runs.append({'binary': r_name, 'argv': argv,
                     'workingdir': Path(r_workingdir).path,
                     'architecture': platform.machine().lower(),
                     'distribution': distribution,
                     'hostname': platform.node(),
                     'system': [platform.system(), platform.release()],
                     'environ': environ,
                     'uid': os.getuid(),
                     'gid': os.getgid(),
                     'signal' if r_exitcode & 0x0100 else 'exitcode':
                         r_exitcode & 0xFF,
                     'input_files': input_files_dict,
                     'output_files': output_files_dict})
    cur.close()

    conn.close()

    save_config(config, runs, packages, files, reprozip_version)

    print("Configuration file written in {0!s}".format(config))
    print("Edit that file then run the packer -- "
          "use 'reprozip pack -h' for help")
Example #8
0
def write_configuration(directory, sort_packages, overwrite=False):
    """Writes the canonical YAML configuration file.
    """
    database = directory / 'trace.sqlite3'

    if PY3:
        # On PY3, connect() only accepts unicode
        conn = sqlite3.connect(str(database))
    else:
        conn = sqlite3.connect(database.path)
    conn.row_factory = sqlite3.Row

    # Reads info from database
    files, inputs, outputs = get_files(conn)

    # Identifies which file comes from which package
    if sort_packages:
        files, packages = identify_packages(files)
    else:
        packages = []

    # Makes sure all the directories used as working directories are packed
    # (they already do if files from them are used, but empty directories do
    # not get packed inside a tar archive)
    files.update(d for d in list_directories(conn) if d.path.is_dir())

    # Writes configuration file
    config = directory / 'config.yml'
    distribution = platform.linux_distribution()[0:2]
    oldconfig = not overwrite and config.exists()
    cur = conn.cursor()
    if not oldconfig:
        runs = []
        # This gets all the top-level processes (p.parent ISNULL) and the first
        # executed file for that process (sorting by ids, which are
        # chronological)
        executions = cur.execute(
                '''
                SELECT e.name, e.argv, e.envp, e.workingdir, p.exitcode
                FROM processes p
                JOIN executed_files e ON e.id=(
                    SELECT id FROM executed_files e2
                    WHERE e2.process=p.id
                    ORDER BY e2.id
                    LIMIT 1
                )
                WHERE p.parent ISNULL;
                ''')
    else:
        # Loads in previous config
        runs, oldpkgs, oldfiles, patterns = load_config(config,
                                                        canonical=False,
                                                        File=TracedFile)
        # Here, additional patterns are discarded

        # Same query as previous block but only gets last process
        executions = cur.execute(
                '''
                SELECT e.name, e.argv, e.envp, e.workingdir, p.exitcode
                FROM processes p
                JOIN executed_files e ON e.id=(
                    SELECT id FROM executed_files e2
                    WHERE e2.process=p.id
                    ORDER BY e2.id
                    LIMIT 1
                )
                WHERE p.parent ISNULL
                ORDER BY p.id DESC
                LIMIT 1;
                ''')
        inputs = inputs[-1:]
        outputs = outputs[-1:]

        files, packages = merge_files(files, packages,
                                      oldfiles,
                                      oldpkgs)
    for ((r_name, r_argv, r_envp, r_workingdir, r_exitcode),
            input_files, output_files) in izip(executions, inputs, outputs):
        # Decodes command-line
        argv = r_argv.split('\0')
        if not argv[-1]:
            argv = argv[:-1]

        # Decodes environment
        envp = r_envp.split('\0')
        if not envp[-1]:
            envp = envp[:-1]
        environ = dict(v.split('=', 1) for v in envp)

        # Gets files from command-line
        command_line_files = {}
        for i, arg in enumerate(argv):
            p = Path(r_workingdir, arg).resolve()
            if p.is_file():
                command_line_files[p] = i
        input_files_on_cmdline = sum(1
                                     for in_file in input_files
                                     if in_file in command_line_files)
        output_files_on_cmdline = sum(1
                                      for out_file in output_files
                                      if out_file in command_line_files)

        # Labels input files
        input_files_dict = {}
        for in_file in input_files:
            # If file is on the command-line
            if in_file in command_line_files:
                if input_files_on_cmdline > 1:
                    label = "arg_%d" % command_line_files[in_file]
                else:
                    label = "arg"
            # Else, use file's name
            else:
                label = in_file.unicodename
            # Make labels unique
            uniquelabel = label
            i = 1
            while uniquelabel in input_files_dict:
                i += 1
                uniquelabel = '%s_%d' % (label, i)
            input_files_dict[uniquelabel] = str(in_file)
        # TODO : Note that right now, we keep as input files the ones that
        # don't appear on the command-line

        # Labels output files
        output_files_dict = {}
        for out_file in output_files:
            # If file is on the command-line
            if out_file in command_line_files:
                if output_files_on_cmdline > 1:
                    label = "arg_%d" % command_line_files[out_file]
                else:
                    label = "arg"
            # Else, use file's name
            else:
                label = out_file.unicodename
            # Make labels unique
            uniquelabel = label
            i = 1
            while uniquelabel in output_files_dict:
                i += 1
                uniquelabel = '%s_%d' % (label, i)
            output_files_dict[uniquelabel] = str(out_file)
        # TODO : Note that right now, we keep as output files the ones that
        # don't appear on the command-line

        runs.append({'binary': r_name, 'argv': argv,
                     'workingdir': Path(r_workingdir).path,
                     'architecture': platform.machine().lower(),
                     'distribution': distribution,
                     'hostname': platform.node(),
                     'system': [platform.system(), platform.release()],
                     'environ': environ,
                     'uid': os.getuid(),
                     'gid': os.getgid(),
                     'signal' if r_exitcode & 0x0100 else 'exitcode':
                         r_exitcode & 0xFF,
                     'input_files': input_files_dict,
                     'output_files': output_files_dict})
    cur.close()

    conn.close()

    save_config(config, runs, packages, files, reprozip_version)

    print("Configuration file written in {0!s}".format(config))
    print("Edit that file then run the packer -- "
          "use 'reprozip pack -h' for help")