Ejemplo n.º 1
0
def metadata_initial_iofiles(config, dct=None):
    """Add the initial state of the {in/out}put files to the unpacker metadata.

    :param config: The configuration as returned by `load_config()`, which will
    be used to list the input and output files and to determine which ones have
    been packed (and therefore exist initially).

    The `input_files` key contains a dict mapping the name to either:
      * None (or inexistent): original file and exists
      * False: doesn't exist (wasn't packed)
      * True: has been generated by one of the run since the experiment was
        unpacked
      * basestring: the user uploaded a file with this path, and no run has
        overwritten it yet
    """
    if dct is None:
        dct = {}

    path2iofile = {f.path: n for n, f in iteritems(config.inputs_outputs)}

    def packed_files():
        yield config.other_files
        for pkg in config.packages:
            if pkg.packfiles:
                yield pkg.files

    for f in itertools.chain.from_iterable(packed_files()):
        f = f.path
        path2iofile.pop(f, None)

    dct['input_files'] = dict((n, False) for n in itervalues(path2iofile))

    return dct
Ejemplo n.º 2
0
def metadata_initial_iofiles(config, dct=None):
    """Add the initial state of the input files to the unpacker metadata.

    :param config: The configuration as returned by `load_config()`, which will
    be used to list the input files and to determine which ones have been
    packed (and therefore exist initially).

    The `input_files` key contains a dict mapping the name to either:
      * None (or inexistent): original file and exists
      * False: doesn't exist (wasn't packed)
      * True: has been generated by one of the run since the experiment was
        unpacked
      * basestring: the user uploaded a file with this path, and no run has
        overwritten it yet
    """
    if dct is None:
        dct = {}

    path2iofile = {f.path: n
                   for n, f in iteritems(config.inputs_outputs)
                   if f.read_runs}

    def packed_files():
        yield config.other_files
        for pkg in config.packages:
            if pkg.packfiles:
                yield pkg.files

    for f in itertools.chain.from_iterable(packed_files()):
        f = f.path
        path2iofile.pop(f, None)

    dct['input_files'] = dict((n, False) for n in itervalues(path2iofile))

    return dct
Ejemplo n.º 3
0
    def install(cls, packages, assume_yes=False):
        options = []
        if assume_yes:
            options.append('-y')
        required_pkgs = set(pkg.name for pkg in packages)
        r = subprocess.call(['yum', 'install'] + options + list(required_pkgs))

        # Checks on packages
        pkgs_status = cls.get_packages_info(packages)
        for pkg, status in itervalues(pkgs_status):
            if status is not None:
                required_pkgs.discard(pkg.name)
        if required_pkgs:
            logging.error("Error: some packages could not be installed:%s",
                          ''.join("\n    %s" % pkg for pkg in required_pkgs))

        return r, pkgs_status
Ejemplo n.º 4
0
    def install(cls, packages, assume_yes=False):
        options = []
        if assume_yes:
            options.append('-y')
        required_pkgs = set(pkg.name for pkg in packages)
        r = subprocess.call(['yum', 'install'] + options + list(required_pkgs))

        # Checks on packages
        pkgs_status = cls.get_packages_info(packages)
        for pkg, status in itervalues(pkgs_status):
            if status is not None:
                required_pkgs.discard(pkg.name)
        if required_pkgs:
            logging.error("Error: some packages could not be installed:%s",
                          ''.join("\n    %s" % pkg for pkg in required_pkgs))

        return r, pkgs_status
Ejemplo n.º 5
0
def generate(target,
             configfile,
             database,
             all_forks=False,
             graph_format='dot',
             level_pkgs='file',
             level_processes='thread',
             level_other_files='all',
             regex_filters=None,
             regex_replaces=None,
             aggregates=None):
    """Main function for the graph subcommand.
    """
    try:
        graph_format = {
            'dot': FORMAT_DOT,
            'DOT': FORMAT_DOT,
            'json': FORMAT_JSON,
            'JSON': FORMAT_JSON
        }[graph_format]
    except KeyError:
        logging.critical("Unknown output format %r", graph_format)
        sys.exit(1)

    level_pkgs, level_processes, level_other_files, file_depth = \
        parse_levels(level_pkgs, level_processes, level_other_files)

    # Reads package ownership from the configuration
    if not configfile.is_file():
        logging.critical("Configuration file does not exist!\n"
                         "Did you forget to run 'reprozip trace'?\n"
                         "If not, you might want to use --dir to specify an "
                         "alternate location.")
        sys.exit(1)
    config = load_config(configfile, canonical=False)
    inputs_outputs = dict(
        (f.path, n) for n, f in iteritems(config.inputs_outputs))
    has_thread_flag = config.format_version >= LooseVersion('0.7')

    runs, files, edges = read_events(database, all_forks, has_thread_flag)

    # Label the runs
    if len(runs) != len(config.runs):
        logging.warning("Configuration file doesn't list the same number of "
                        "runs we found in the database!")
    else:
        for config_run, run in izip(config.runs, runs):
            run.name = config_run['id']

    # Apply regexes
    ignore = [
        lambda path, r=re.compile(p): r.search(path) is not None
        for p in regex_filters or []
    ]
    replace = [
        lambda path, r=re.compile(p): r.sub(repl, path)
        for p, repl in regex_replaces or []
    ]

    def filefilter(path):
        pathuni = unicode_(path)
        if any(f(pathuni) for f in ignore):
            logging.debug("IGN %s", pathuni)
            return None
        if not (replace or aggregates):
            return path
        for fi in replace:
            pathuni_ = fi(pathuni)
            if pathuni_ != pathuni:
                logging.debug("SUB %s -> %s", pathuni, pathuni_)
            pathuni = pathuni_
        for prefix in aggregates or []:
            if pathuni.startswith(prefix):
                logging.debug("AGG %s -> %s", pathuni, prefix)
                pathuni = prefix
                break
        return PosixPath(pathuni)

    files_new = set()
    for fi in files:
        fi = filefilter(fi)
        if fi is not None:
            files_new.add(fi)
    files = files_new

    edges_new = OrderedSet()
    for prog, fi, mode, argv in edges:
        fi = filefilter(fi)
        if fi is not None:
            edges_new.add((prog, fi, mode, argv))
    edges = edges_new

    # Puts files in packages
    package_map = {}
    if level_pkgs == LVL_PKG_IGNORE:
        packages = []
        other_files = files
    else:
        logging.info("Organizes packages...")
        file2package = dict(
            (f.path, pkg) for pkg in config.packages for f in pkg.files)
        packages = {}
        other_files = []
        for fi in files:
            pkg = file2package.get(fi)
            if pkg is not None:
                package = packages.get(pkg.name)
                if package is None:
                    package = Package(pkg.name, pkg.version)
                    packages[pkg.name] = package
                package.files.add(fi)
                package_map[fi] = package
            else:
                other_files.append(fi)
        packages = sorted(itervalues(packages), key=lambda pkg: pkg.name)
        for i, pkg in enumerate(packages):
            pkg.id = i

    # Filter other files
    if level_other_files == LVL_OTHER_ALL and file_depth is not None:
        other_files = set(
            PosixPath(*f.components[:file_depth + 1]) for f in other_files)
        edges = OrderedSet((prog, f if f in package_map else PosixPath(
            *f.components[:file_depth + 1]), mode, argv)
                           for prog, f, mode, argv in edges)
    else:
        if level_other_files == LVL_OTHER_IO:
            other_files = set(f for f in other_files if f in inputs_outputs)
            edges = [(prog, f, mode, argv) for prog, f, mode, argv in edges
                     if f in package_map or f in other_files]
        elif level_other_files == LVL_OTHER_NO:
            other_files = set()
            edges = [(prog, f, mode, argv) for prog, f, mode, argv in edges
                     if f in package_map]

    args = (target, runs, packages, other_files, package_map, edges,
            inputs_outputs, level_pkgs, level_processes, level_other_files)
    if graph_format == FORMAT_DOT:
        graph_dot(*args)
    elif graph_format == FORMAT_JSON:
        graph_json(*args)
    else:
        assert False
Ejemplo n.º 6
0
def directory_create(args):
    """Unpacks the experiment in a folder.

    Only the files that are not part of a package are copied (unless they are
    missing from the system and were packed).

    In addition, input files are put in a tar.gz (so they can be put back after
    an upload) and the configuration file is extracted.
    """
    if not args.pack:
        logger.critical("setup needs the pack filename")
        sys.exit(1)

    pack = Path(args.pack[0])
    target = Path(args.target[0])
    if target.exists():
        logger.critical("Target directory exists")
        sys.exit(1)

    if not issubclass(DefaultAbstractPath, PosixPath):
        logger.critical("Not unpacking on POSIX system")
        sys.exit(1)

    signals.pre_setup(target=target, pack=pack)

    # Unpacks configuration file
    rpz_pack = RPZPack(pack)
    rpz_pack.extract_config(target / 'config.yml')

    # Loads config
    config = load_config_file(target / 'config.yml', True)
    packages = config.packages

    target.mkdir()
    root = (target / 'root').absolute()

    # Checks packages
    missing_files = False
    for pkg in packages:
        if pkg.packfiles:
            continue
        for f in pkg.files:
            if not Path(f.path).exists():
                logger.error(
                    "Missing file %s (from package %s that wasn't packed) "
                    "on host, experiment will probably miss it.", f, pkg.name)
                missing_files = True
    if missing_files:
        record_usage(directory_missing_pkgs=True)
        logger.error("Some packages are missing, you should probably install "
                     "them.\nUse 'reprounzip installpkgs -h' for help")

    root.mkdir()
    try:
        # Unpacks files
        members = rpz_pack.list_data()
        for m in members:
            # Remove 'DATA/' prefix
            m.name = str(rpz_pack.remove_data_prefix(m.name))
            # Makes symlink targets relative
            if m.issym():
                linkname = PosixPath(m.linkname)
                if linkname.is_absolute:
                    m.linkname = join_root(root, PosixPath(m.linkname)).path
        logger.info("Extracting files...")
        rpz_pack.extract_data(root, members)
        rpz_pack.close()

        # Original input files, so upload can restore them
        input_files = [
            f.path for f in itervalues(config.inputs_outputs) if f.read_runs
        ]
        if input_files:
            logger.info("Packing up original input files...")
            inputtar = tarfile.open(str(target / 'inputs.tar.gz'), 'w:gz')
            for ifile in input_files:
                filename = join_root(root, ifile)
                if filename.exists():
                    inputtar.add(str(filename), str(ifile))
            inputtar.close()

        # Meta-data for reprounzip
        metadata_write(target, metadata_initial_iofiles(config), 'directory')

        signals.post_setup(target=target, pack=pack)
    except Exception:
        rmtree_fixed(root)
        raise
Ejemplo n.º 7
0
def chroot_create(args):
    """Unpacks the experiment in a folder so it can be run with chroot.

    All the files in the pack are unpacked; system files are copied only if
    they were not packed, and busybox is installed if /bin/sh wasn't packed.

    In addition, input files are put in a tar.gz (so they can be put back after
    an upload) and the configuration file is extracted.
    """
    if not args.pack:
        logger.critical("setup/create needs the pack filename")
        sys.exit(1)

    pack = Path(args.pack[0])
    target = Path(args.target[0])
    if target.exists():
        logger.critical("Target directory exists")
        sys.exit(1)

    if not issubclass(DefaultAbstractPath, PosixPath):
        logger.critical("Not unpacking on POSIX system")
        sys.exit(1)

    signals.pre_setup(target=target, pack=pack)

    # We can only restore owner/group of files if running as root
    restore_owner = should_restore_owner(args.restore_owner)

    # Unpacks configuration file
    rpz_pack = RPZPack(pack)
    rpz_pack.extract_config(target / 'config.yml')

    # Loads config
    config = load_config_file(target / 'config.yml', True)
    packages = config.packages

    target.mkdir()
    root = (target / 'root').absolute()

    root.mkdir()
    try:
        # Checks that everything was packed
        packages_not_packed = [pkg for pkg in packages if not pkg.packfiles]
        if packages_not_packed:
            record_usage(chroot_missing_pkgs=True)
            logger.warning(
                "According to configuration, some files were left "
                "out because they belong to the following "
                "packages:%s\nWill copy files from HOST SYSTEM",
                ''.join('\n    %s' % pkg for pkg in packages_not_packed))
            missing_files = False
            for pkg in packages_not_packed:
                for f in pkg.files:
                    path = Path(f.path)
                    if not path.exists():
                        logger.error(
                            "Missing file %s (from package %s) on host, "
                            "experiment will probably miss it", path, pkg.name)
                        missing_files = True
                        continue
                    dest = join_root(root, path)
                    dest.parent.mkdir(parents=True)
                    if path.is_link():
                        dest.symlink(path.read_link())
                    else:
                        path.copy(dest)
                    if restore_owner:
                        stat = path.stat()
                        dest.chown(stat.st_uid, stat.st_gid)
            if missing_files:
                record_usage(chroot_mising_files=True)

        # Unpacks files
        members = rpz_pack.list_data()
        for m in members:
            # Remove 'DATA/' prefix
            m.name = str(rpz_pack.remove_data_prefix(m.name))
        if not restore_owner:
            uid = os.getuid()
            gid = os.getgid()
            for m in members:
                m.uid = uid
                m.gid = gid
        logger.info("Extracting files...")
        rpz_pack.extract_data(root, members)
        rpz_pack.close()

        resolvconf_src = Path('/etc/resolv.conf')
        if resolvconf_src.exists():
            try:
                resolvconf_src.copy(root / 'etc/resolv.conf')
            except IOError:
                pass

        # Sets up /bin/sh and /usr/bin/env, downloading busybox if necessary
        sh_path = join_root(root, Path('/bin/sh'))
        env_path = join_root(root, Path('/usr/bin/env'))
        if not sh_path.lexists() or not env_path.lexists():
            logger.info("Setting up busybox...")
            busybox_path = join_root(root, Path('/bin/busybox'))
            busybox_path.parent.mkdir(parents=True)
            with make_dir_writable(join_root(root, Path('/bin'))):
                download_file(busybox_url(config.runs[0]['architecture']),
                              busybox_path,
                              'busybox-%s' % config.runs[0]['architecture'])
                busybox_path.chmod(0o755)
                if not sh_path.lexists():
                    sh_path.parent.mkdir(parents=True)
                    sh_path.symlink('/bin/busybox')
                if not env_path.lexists():
                    env_path.parent.mkdir(parents=True)
                    env_path.symlink('/bin/busybox')

        # Original input files, so upload can restore them
        input_files = [
            f.path for f in itervalues(config.inputs_outputs) if f.read_runs
        ]
        if input_files:
            logger.info("Packing up original input files...")
            inputtar = tarfile.open(str(target / 'inputs.tar.gz'), 'w:gz')
            for ifile in input_files:
                filename = join_root(root, ifile)
                if filename.exists():
                    inputtar.add(str(filename), str(ifile))
            inputtar.close()

        # Meta-data for reprounzip
        metadata_write(target, metadata_initial_iofiles(config), 'chroot')

        signals.post_setup(target=target, pack=pack)
    except Exception:
        rmtree_fixed(root)
        raise
Ejemplo n.º 8
0
def directory_create(args):
    """Unpacks the experiment in a folder.

    Only the files that are not part of a package are copied (unless they are
    missing from the system and were packed).

    In addition, input files are put in a tar.gz (so they can be put back after
    an upload) and the configuration file is extracted.
    """
    if not args.pack:
        logging.critical("setup needs the pack filename")
        sys.exit(1)

    pack = Path(args.pack[0])
    target = Path(args.target[0])
    if target.exists():
        logging.critical("Target directory exists")
        sys.exit(1)

    if not issubclass(DefaultAbstractPath, PosixPath):
        logging.critical("Not unpacking on POSIX system")
        sys.exit(1)

    signals.pre_setup(target=target, pack=pack)

    # Unpacks configuration file
    rpz_pack = RPZPack(pack)
    rpz_pack.extract_config(target / 'config.yml')

    # Loads config
    config = load_config_file(target / 'config.yml', True)
    packages = config.packages

    target.mkdir()
    root = (target / 'root').absolute()

    # Checks packages
    missing_files = False
    for pkg in packages:
        if pkg.packfiles:
            continue
        for f in pkg.files:
            if not Path(f.path).exists():
                logging.error(
                    "Missing file %s (from package %s that wasn't packed) "
                    "on host, experiment will probably miss it.",
                    f, pkg.name)
                missing_files = True
    if missing_files:
        record_usage(directory_missing_pkgs=True)
        logging.error("Some packages are missing, you should probably install "
                      "them.\nUse 'reprounzip installpkgs -h' for help")

    root.mkdir()
    try:
        # Unpacks files
        members = rpz_pack.list_data()
        for m in members:
            # Remove 'DATA/' prefix
            m.name = str(rpz_pack.remove_data_prefix(m.name))
            # Makes symlink targets relative
            if m.issym():
                linkname = PosixPath(m.linkname)
                if linkname.is_absolute:
                    m.linkname = join_root(root, PosixPath(m.linkname)).path
        logging.info("Extracting files...")
        rpz_pack.extract_data(root, members)
        rpz_pack.close()

        # Original input files, so upload can restore them
        input_files = [f.path for f in itervalues(config.inputs_outputs)
                       if f.read_runs]
        if input_files:
            logging.info("Packing up original input files...")
            inputtar = tarfile.open(str(target / 'inputs.tar.gz'), 'w:gz')
            for ifile in input_files:
                filename = join_root(root, ifile)
                if filename.exists():
                    inputtar.add(str(filename), str(ifile))
            inputtar.close()

        # Meta-data for reprounzip
        metadata_write(target, metadata_initial_iofiles(config), 'directory')

        signals.post_setup(target=target, pack=pack)
    except Exception:
        rmtree_fixed(root)
        raise
Ejemplo n.º 9
0
def chroot_create(args):
    """Unpacks the experiment in a folder so it can be run with chroot.

    All the files in the pack are unpacked; system files are copied only if
    they were not packed, and busybox is installed if /bin/sh wasn't packed.

    In addition, input files are put in a tar.gz (so they can be put back after
    an upload) and the configuration file is extracted.
    """
    if not args.pack:
        logging.critical("setup/create needs the pack filename")
        sys.exit(1)

    pack = Path(args.pack[0])
    target = Path(args.target[0])
    if target.exists():
        logging.critical("Target directory exists")
        sys.exit(1)

    if DefaultAbstractPath is not PosixPath:
        logging.critical("Not unpacking on POSIX system")
        sys.exit(1)

    signals.pre_setup(target=target, pack=pack)

    # We can only restore owner/group of files if running as root
    restore_owner = should_restore_owner(args.restore_owner)

    # Unpacks configuration file
    rpz_pack = RPZPack(pack)
    rpz_pack.extract_config(target / 'config.yml')

    # Loads config
    config = load_config_file(target / 'config.yml', True)
    packages = config.packages

    target.mkdir()
    root = (target / 'root').absolute()

    root.mkdir()
    try:
        # Checks that everything was packed
        packages_not_packed = [pkg for pkg in packages if not pkg.packfiles]
        if packages_not_packed:
            record_usage(chroot_missing_pkgs=True)
            logging.warning("According to configuration, some files were left "
                            "out because they belong to the following "
                            "packages:%s\nWill copy files from HOST SYSTEM",
                            ''.join('\n    %s' % pkg
                                    for pkg in packages_not_packed))
            missing_files = False
            for pkg in packages_not_packed:
                for f in pkg.files:
                    path = Path(f.path)
                    if not path.exists():
                        logging.error(
                            "Missing file %s (from package %s) on host, "
                            "experiment will probably miss it",
                            path, pkg.name)
                        missing_files = True
                        continue
                    dest = join_root(root, path)
                    dest.parent.mkdir(parents=True)
                    if path.is_link():
                        dest.symlink(path.read_link())
                    else:
                        path.copy(dest)
                    if restore_owner:
                        stat = path.stat()
                        dest.chown(stat.st_uid, stat.st_gid)
            if missing_files:
                record_usage(chroot_mising_files=True)

        # Unpacks files
        members = rpz_pack.list_data()
        for m in members:
            # Remove 'DATA/' prefix
            m.name = str(rpz_pack.remove_data_prefix(m.name))
        if not restore_owner:
            uid = os.getuid()
            gid = os.getgid()
            for m in members:
                m.uid = uid
                m.gid = gid
        logging.info("Extracting files...")
        rpz_pack.extract_data(root, members)
        rpz_pack.close()

        # Sets up /bin/sh and /usr/bin/env, downloading busybox if necessary
        sh_path = join_root(root, Path('/bin/sh'))
        env_path = join_root(root, Path('/usr/bin/env'))
        if not sh_path.lexists() or not env_path.lexists():
            logging.info("Setting up busybox...")
            busybox_path = join_root(root, Path('/bin/busybox'))
            busybox_path.parent.mkdir(parents=True)
            with make_dir_writable(join_root(root, Path('/bin'))):
                download_file(busybox_url(config.runs[0]['architecture']),
                              busybox_path,
                              'busybox-%s' % config.runs[0]['architecture'])
                busybox_path.chmod(0o755)
                if not sh_path.lexists():
                    sh_path.parent.mkdir(parents=True)
                    sh_path.symlink('/bin/busybox')
                if not env_path.lexists():
                    env_path.parent.mkdir(parents=True)
                    env_path.symlink('/bin/busybox')

        # Original input files, so upload can restore them
        input_files = [f.path for f in itervalues(config.inputs_outputs)
                       if f.read_runs]
        if input_files:
            logging.info("Packing up original input files...")
            inputtar = tarfile.open(str(target / 'inputs.tar.gz'), 'w:gz')
            for ifile in input_files:
                filename = join_root(root, ifile)
                if filename.exists():
                    inputtar.add(str(filename), str(ifile))
            inputtar.close()

        # Meta-data for reprounzip
        metadata_write(target, metadata_initial_iofiles(config), 'chroot')

        signals.post_setup(target=target, pack=pack)
    except Exception:
        rmtree_fixed(root)
        raise
Ejemplo n.º 10
0
def showfiles(args):
    """Writes out the input and output files.

    Works both for a pack file and for an extracted directory.
    """
    def parse_run(runs, s):
        for i, run in enumerate(runs):
            if run['id'] == s:
                return i
        try:
            r = int(s)
        except ValueError:
            logging.critical("Error: Unknown run %s", s)
            raise UsageError
        if r < 0 or r >= len(runs):
            logging.critical("Error: Expected 0 <= run <= %d, got %d",
                             len(runs) - 1, r)
            sys.exit(1)
        return r

    show_inputs = args.input or not args.output
    show_outputs = args.output or not args.input

    def file_filter(fio):
        if file_filter.run is None:
            return ((show_inputs and fio.read_runs)
                    or (show_outputs and fio.write_runs))
        else:
            return ((show_inputs and file_filter.run in fio.read_runs)
                    or (show_outputs and file_filter.run in fio.write_runs))

    file_filter.run = None

    pack = Path(args.pack[0])

    if not pack.exists():
        logging.critical("Pack or directory %s does not exist", pack)
        sys.exit(1)

    if pack.is_dir():
        # Reads info from an unpacked directory
        config = load_config_file(pack / 'config.yml', canonical=True)

        # Filter files by run
        if args.run is not None:
            file_filter.run = parse_run(config.runs, args.run)

        # The '.reprounzip' file is a pickled dictionary, it contains the name
        # of the files that replaced each input file (if upload was used)
        unpacked_info = metadata_read(pack, None)
        assigned_input_files = unpacked_info.get('input_files', {})

        if show_inputs:
            shown = False
            for input_name, f in sorted(iteritems(config.inputs_outputs)):
                if f.read_runs and file_filter(f):
                    if not shown:
                        print("Input files:")
                        shown = True
                    if args.verbosity >= 2:
                        print("    %s (%s)" % (input_name, f.path))
                    else:
                        print("    %s" % input_name)

                    assigned = assigned_input_files.get(input_name)
                    if assigned is None:
                        assigned = "(original)"
                    elif assigned is False:
                        assigned = "(not created)"
                    elif assigned is True:
                        assigned = "(generated)"
                    else:
                        assert isinstance(assigned, (bytes, unicode_))
                    print("      %s" % assigned)
            if not shown:
                print("Input files: none")

        if show_outputs:
            shown = False
            for output_name, f in sorted(iteritems(config.inputs_outputs)):
                if f.write_runs and file_filter(f):
                    if not shown:
                        print("Output files:")
                        shown = True
                    if args.verbosity >= 2:
                        print("    %s (%s)" % (output_name, f.path))
                    else:
                        print("    %s" % output_name)
            if not shown:
                print("Output files: none")

    else:  # pack.is_file()
        # Reads info from a pack file
        config = load_config(pack)

        # Filter files by run
        if args.run is not None:
            file_filter.run = parse_run(config.runs, args.run)

        if any(f.read_runs for f in itervalues(config.inputs_outputs)):
            print("Input files:")
            for input_name, f in sorted(iteritems(config.inputs_outputs)):
                if f.read_runs and file_filter(f):
                    if args.verbosity >= 2:
                        print("    %s (%s)" % (input_name, f.path))
                    else:
                        print("    %s" % input_name)
        else:
            print("Input files: none")

        if any(f.write_runs for f in itervalues(config.inputs_outputs)):
            print("Output files:")
            for output_name, f in sorted(iteritems(config.inputs_outputs)):
                if f.write_runs and file_filter(f):
                    if args.verbosity >= 2:
                        print("    %s (%s)" % (output_name, f.path))
                    else:
                        print("    %s" % output_name)
        else:
            print("Output files: none")
Ejemplo n.º 11
0
def generate(target, configfile, database, all_forks=False, graph_format='dot',
             level_pkgs='file', level_processes='thread',
             level_other_files='all',
             regex_filters=None, regex_replaces=None, aggregates=None):
    """Main function for the graph subcommand.
    """
    try:
        graph_format = {'dot': FORMAT_DOT, 'DOT': FORMAT_DOT,
                        'json': FORMAT_JSON, 'JSON': FORMAT_JSON}[graph_format]
    except KeyError:
        logging.critical("Unknown output format %r", graph_format)
        sys.exit(1)

    level_pkgs, level_processes, level_other_files, file_depth = \
        parse_levels(level_pkgs, level_processes, level_other_files)

    # Reads package ownership from the configuration
    if not configfile.is_file():
        logging.critical("Configuration file does not exist!\n"
                         "Did you forget to run 'reprozip trace'?\n"
                         "If not, you might want to use --dir to specify an "
                         "alternate location.")
        sys.exit(1)
    config = load_config(configfile, canonical=False)
    inputs_outputs = dict((f.path, n)
                          for n, f in iteritems(config.inputs_outputs))
    has_thread_flag = config.format_version >= LooseVersion('0.7')

    runs, files, edges = read_events(database, all_forks,
                                     has_thread_flag)

    # Label the runs
    if len(runs) != len(config.runs):
        logging.warning("Configuration file doesn't list the same number of "
                        "runs we found in the database!")
    else:
        for config_run, run in izip(config.runs, runs):
            run.name = config_run['id']

    # Apply regexes
    ignore = [lambda path, r=re.compile(p): r.search(path) is not None
              for p in regex_filters or []]
    replace = [lambda path, r=re.compile(p): r.sub(repl, path)
               for p, repl in regex_replaces or []]

    def filefilter(path):
        pathuni = unicode_(path)
        if any(f(pathuni) for f in ignore):
            logging.debug("IGN %s", pathuni)
            return None
        if not (replace or aggregates):
            return path
        for fi in replace:
            pathuni_ = fi(pathuni)
            if pathuni_ != pathuni:
                logging.debug("SUB %s -> %s", pathuni, pathuni_)
            pathuni = pathuni_
        for prefix in aggregates or []:
            if pathuni.startswith(prefix):
                logging.debug("AGG %s -> %s", pathuni, prefix)
                pathuni = prefix
                break
        return PosixPath(pathuni)

    files_new = set()
    for fi in files:
        fi = filefilter(fi)
        if fi is not None:
            files_new.add(fi)
    files = files_new

    edges_new = OrderedSet()
    for prog, fi, mode, argv in edges:
        fi = filefilter(fi)
        if fi is not None:
            edges_new.add((prog, fi, mode, argv))
    edges = edges_new

    # Puts files in packages
    package_map = {}
    if level_pkgs == LVL_PKG_IGNORE:
        packages = []
        other_files = files
    else:
        logging.info("Organizes packages...")
        file2package = dict((f.path, pkg)
                            for pkg in config.packages for f in pkg.files)
        packages = {}
        other_files = []
        for fi in files:
            pkg = file2package.get(fi)
            if pkg is not None:
                package = packages.get(pkg.name)
                if package is None:
                    package = Package(pkg.name, pkg.version)
                    packages[pkg.name] = package
                package.files.add(fi)
                package_map[fi] = package
            else:
                other_files.append(fi)
        packages = sorted(itervalues(packages), key=lambda pkg: pkg.name)
        for i, pkg in enumerate(packages):
            pkg.id = i

    # Filter other files
    if level_other_files == LVL_OTHER_ALL and file_depth is not None:
        other_files = set(PosixPath(*f.components[:file_depth + 1])
                          for f in other_files)
        edges = OrderedSet((prog,
                            f if f in package_map
                            else PosixPath(*f.components[:file_depth + 1]),
                            mode,
                            argv)
                           for prog, f, mode, argv in edges)
    else:
        if level_other_files == LVL_OTHER_IO:
            other_files = set(f for f in other_files if f in inputs_outputs)
            edges = [(prog, f, mode, argv)
                     for prog, f, mode, argv in edges
                     if f in package_map or f in other_files]
        elif level_other_files == LVL_OTHER_NO:
            other_files = set()
            edges = [(prog, f, mode, argv)
                     for prog, f, mode, argv in edges
                     if f in package_map]

    args = (target, runs, packages, other_files, package_map, edges,
            inputs_outputs, level_pkgs, level_processes, level_other_files)
    if graph_format == FORMAT_DOT:
        graph_dot(*args)
    elif graph_format == FORMAT_JSON:
        graph_json(*args)
    else:
        assert False
Ejemplo n.º 12
0
def showfiles(args):
    """Writes out the input and output files.

    Works both for a pack file and for an extracted directory.
    """
    def parse_run(runs, s):
        for i, run in enumerate(runs):
            if run['id'] == s:
                return i
        try:
            r = int(s)
        except ValueError:
            logging.critical("Error: Unknown run %s", s)
            raise UsageError
        if r < 0 or r >= len(runs):
            logging.critical("Error: Expected 0 <= run <= %d, got %d",
                             len(runs) - 1, r)
            sys.exit(1)
        return r

    show_inputs = args.input or not args.output
    show_outputs = args.output or not args.input

    def file_filter(fio):
        if file_filter.run is None:
            return ((show_inputs and fio.read_runs) or
                    (show_outputs and fio.write_runs))
        else:
            return ((show_inputs and file_filter.run in fio.read_runs) or
                    (show_outputs and file_filter.run in fio.write_runs))

    file_filter.run = None

    pack = Path(args.pack[0])

    if not pack.exists():
        logging.critical("Pack or directory %s does not exist", pack)
        sys.exit(1)

    if pack.is_dir():
        # Reads info from an unpacked directory
        config = load_config_file(pack / 'config.yml',
                                  canonical=True)

        # Filter files by run
        if args.run is not None:
            file_filter.run = parse_run(config.runs, args.run)

        # The '.reprounzip' file is a pickled dictionary, it contains the name
        # of the files that replaced each input file (if upload was used)
        unpacked_info = metadata_read(pack, None)
        assigned_input_files = unpacked_info.get('input_files', {})

        if show_inputs:
            shown = False
            for input_name, f in sorted(iteritems(config.inputs_outputs)):
                if f.read_runs and file_filter(f):
                    if not shown:
                        print("Input files:")
                        shown = True
                    if args.verbosity >= 2:
                        print("    %s (%s)" % (input_name, f.path))
                    else:
                        print("    %s" % input_name)

                    assigned = assigned_input_files.get(input_name)
                    if assigned is None:
                        assigned = "(original)"
                    elif assigned is False:
                        assigned = "(not created)"
                    elif assigned is True:
                        assigned = "(generated)"
                    else:
                        assert isinstance(assigned, (bytes, unicode_))
                    print("      %s" % assigned)
            if not shown:
                print("Input files: none")

        if show_outputs:
            shown = False
            for output_name, f in sorted(iteritems(config.inputs_outputs)):
                if f.write_runs and file_filter(f):
                    if not shown:
                        print("Output files:")
                        shown = True
                    if args.verbosity >= 2:
                        print("    %s (%s)" % (output_name, f.path))
                    else:
                        print("    %s" % output_name)
            if not shown:
                print("Output files: none")

    else:  # pack.is_file()
        # Reads info from a pack file
        config = load_config(pack)

        # Filter files by run
        if args.run is not None:
            file_filter.run = parse_run(config.runs, args.run)

        if any(f.read_runs for f in itervalues(config.inputs_outputs)):
            print("Input files:")
            for input_name, f in sorted(iteritems(config.inputs_outputs)):
                if f.read_runs and file_filter(f):
                    if args.verbosity >= 2:
                        print("    %s (%s)" % (input_name, f.path))
                    else:
                        print("    %s" % input_name)
        else:
            print("Input files: none")

        if any(f.write_runs for f in itervalues(config.inputs_outputs)):
            print("Output files:")
            for output_name, f in sorted(iteritems(config.inputs_outputs)):
                if f.write_runs and file_filter(f):
                    if args.verbosity >= 2:
                        print("    %s (%s)" % (output_name, f.path))
                    else:
                        print("    %s" % output_name)
        else:
            print("Output files: none")
Ejemplo n.º 13
0
def chroot_create(args):
    """Unpacks the experiment in a folder so it can be run with chroot.

    All the files in the pack are unpacked; system files are copied only if
    they were not packed, and busybox is installed if /bin/sh wasn't packed.

    In addition, input files are put in a tar.gz (so they can be put back after
    an upload) and the configuration file is extracted.
    """
    if not args.pack:
        logging.critical("setup/create needs the pack filename")
        sys.exit(1)

    pack = Path(args.pack[0])
    target = Path(args.target[0])
    if target.exists():
        logging.critical("Target directory exists")
        sys.exit(1)

    if DefaultAbstractPath is not PosixPath:
        logging.critical("Not unpacking on POSIX system")
        sys.exit(1)

    signals.pre_setup(target=target, pack=pack)

    # We can only restore owner/group of files if running as root
    restore_owner = should_restore_owner(args.restore_owner)

    # Unpacks configuration file
    tar = tarfile.open(str(pack), 'r:*')
    member = tar.getmember('METADATA/config.yml')
    member.name = 'config.yml'
    tar.extract(member, str(target))

    # Loads config
    runs, packages, other_files = load_config_file(target / 'config.yml', True)

    target.mkdir()
    root = (target / 'root').absolute()
    root.mkdir()

    # Checks that everything was packed
    packages_not_packed = [pkg for pkg in packages if not pkg.packfiles]
    if packages_not_packed:
        record_usage(chroot_missing_pkgs=True)
        logging.warning("According to configuration, some files were left out "
                        "because they belong to the following packages:%s"
                        "\nWill copy files from HOST SYSTEM",
                        ''.join('\n    %s' % pkg
                                for pkg in packages_not_packed))
        missing_files = False
        for pkg in packages_not_packed:
            for f in pkg.files:
                f = Path(f.path)
                if not f.exists():
                    logging.error(
                            "Missing file %s (from package %s) on host, "
                            "experiment will probably miss it",
                            f, pkg.name)
                    missing_files = True
                    continue
                dest = join_root(root, f)
                dest.parent.mkdir(parents=True)
                if f.is_link():
                    dest.symlink(f.read_link())
                else:
                    f.copy(dest)
                if restore_owner:
                    stat = f.stat()
                    dest.chown(stat.st_uid, stat.st_gid)
        if missing_files:
            record_usage(chroot_mising_files=True)

    # Unpacks files
    if any('..' in m.name or m.name.startswith('/') for m in tar.getmembers()):
        logging.critical("Tar archive contains invalid pathnames")
        sys.exit(1)
    members = [m for m in tar.getmembers() if m.name.startswith('DATA/')]
    for m in members:
        m.name = m.name[5:]
    if not restore_owner:
        uid = os.getuid()
        gid = os.getgid()
        for m in members:
            m.uid = uid
            m.gid = gid
    logging.info("Extracting files...")
    tar.extractall(str(root), members)
    tar.close()

    # Sets up /bin/sh and /usr/bin/env, downloading busybox if necessary
    sh_path = join_root(root, Path('/bin/sh'))
    env_path = join_root(root, Path('/usr/bin/env'))
    if not sh_path.lexists() or not env_path.lexists():
        logging.info("Setting up busybox...")
        busybox_path = join_root(root, Path('/bin/busybox'))
        busybox_path.parent.mkdir(parents=True)
        with make_dir_writable(join_root(root, Path('/bin'))):
            download_file(busybox_url(runs[0]['architecture']),
                          busybox_path)
            busybox_path.chmod(0o755)
            if not sh_path.lexists():
                sh_path.parent.mkdir(parents=True)
                sh_path.symlink('/bin/busybox')
            if not env_path.lexists():
                env_path.parent.mkdir(parents=True)
                env_path.symlink('/bin/busybox')

    # Original input files, so upload can restore them
    if any(run['input_files'] for run in runs):
        logging.info("Packing up original input files...")
        inputtar = tarfile.open(str(target / 'inputs.tar.gz'), 'w:gz')
        for run in runs:
            for ifile in itervalues(run['input_files']):
                inputtar.add(str(join_root(root, PosixPath(ifile))),
                             str(PosixPath(ifile)))
        inputtar.close()

    # Meta-data for reprounzip
    write_dict(target / '.reprounzip', {}, 'chroot')

    signals.post_setup(target=target)
Ejemplo n.º 14
0
def directory_create(args):
    """Unpacks the experiment in a folder.

    Only the files that are not part of a package are copied (unless they are
    missing from the system and were packed).

    In addition, input files are put in a tar.gz (so they can be put back after
    an upload) and the configuration file is extracted.
    """
    if not args.pack:
        logging.critical("setup needs the pack filename")
        sys.exit(1)

    pack = Path(args.pack[0])
    target = Path(args.target[0])
    if target.exists():
        logging.critical("Target directory exists")
        sys.exit(1)

    if not issubclass(DefaultAbstractPath, PosixPath):
        logging.critical("Not unpacking on POSIX system")
        sys.exit(1)

    signals.pre_setup(target=target, pack=pack)

    # Unpacks configuration file
    tar = tarfile.open(str(pack), 'r:*')
    member = tar.getmember('METADATA/config.yml')
    member.name = 'config.yml'
    tar.extract(member, str(target))

    # Loads config
    runs, packages, other_files = load_config_file(target / 'config.yml', True)

    target.mkdir()
    root = (target / 'root').absolute()
    root.mkdir()

    # Checks packages
    missing_files = False
    for pkg in packages:
        if pkg.packfiles:
            continue
        for f in pkg.files:
            f = Path(f.path)
            if not f.exists():
                logging.error(
                        "Missing file %s (from package %s that wasn't packed) "
                        "on host, experiment will probably miss it.",
                        f, pkg.name)
                missing_files = True
    if missing_files:
        record_usage(directory_missing_pkgs=True)
        logging.error(
                "Some packages are missing, you should probably install "
                "them.\nUse 'reprounzip installpkgs -h' for help")

    # Unpacks files
    if any('..' in m.name or m.name.startswith('/') for m in tar.getmembers()):
        logging.critical("Tar archive contains invalid pathnames")
        sys.exit(1)
    members = [m for m in tar.getmembers() if m.name.startswith('DATA/')]
    for m in members:
        m.name = m.name[5:]
    # Makes symlink targets relative
    for m in members:
        if not m.issym():
            continue
        linkname = PosixPath(m.linkname)
        if linkname.is_absolute:
            m.linkname = join_root(root, PosixPath(m.linkname)).path
    logging.info("Extracting files...")
    tar.extractall(str(root), members)
    tar.close()

    # Gets library paths
    lib_dirs = []
    p = subprocess.Popen(['/sbin/ldconfig', '-v', '-N'],
                         stdout=subprocess.PIPE)
    try:
        for l in p.stdout:
            if len(l) < 3 or l[0] in (b' ', b'\t'):
                continue
            if l.endswith(b':\n'):
                lib_dirs.append(Path(l[:-2]))
    finally:
        p.wait()

    # Original input files, so upload can restore them
    if any(run['input_files'] for run in runs):
        logging.info("Packing up original input files...")
        inputtar = tarfile.open(str(target / 'inputs.tar.gz'), 'w:gz')
        for run in runs:
            for ifile in itervalues(run['input_files']):
                inputtar.add(str(join_root(root, PosixPath(ifile))),
                             str(PosixPath(ifile)))
        inputtar.close()

    # Meta-data for reprounzip
    write_dict(target / '.reprounzip', {}, 'directory')

    signals.post_setup(target=target)
Ejemplo n.º 15
0
def directory_create(args):
    """Unpacks the experiment in a folder.

    Only the files that are not part of a package are copied (unless they are
    missing from the system and were packed).

    In addition, input files are put in a tar.gz (so they can be put back after
    an upload) and the configuration file is extracted.
    """
    if not args.pack:
        logging.critical("setup needs the pack filename")
        sys.exit(1)

    pack = Path(args.pack[0])
    target = Path(args.target[0])
    if target.exists():
        logging.critical("Target directory exists")
        sys.exit(1)

    if DefaultAbstractPath is not PosixPath:
        logging.critical("Not unpacking on POSIX system")
        sys.exit(1)

    signals.pre_setup(target=target, pack=pack)

    # Unpacks configuration file
    tar = tarfile.open(str(pack), 'r:*')
    member = tar.getmember('METADATA/config.yml')
    member.name = 'config.yml'
    tar.extract(member, str(target))

    # Loads config
    runs, packages, other_files = load_config_file(target / 'config.yml', True)

    target.mkdir()
    root = (target / 'root').absolute()
    root.mkdir()

    # Checks packages
    missing_files = False
    for pkg in packages:
        if pkg.packfiles:
            continue
        for f in pkg.files:
            f = Path(f.path)
            if not f.exists():
                logging.error(
                    "Missing file %s (from package %s that wasn't packed) "
                    "on host, experiment will probably miss it.", f, pkg.name)
                missing_files = True
    if missing_files:
        logging.error("Some packages are missing, you should probably install "
                      "them.\nUse 'reprounzip installpkgs -h' for help")

    # Unpacks files
    if any('..' in m.name or m.name.startswith('/') for m in tar.getmembers()):
        logging.critical("Tar archive contains invalid pathnames")
        sys.exit(1)
    members = [m for m in tar.getmembers() if m.name.startswith('DATA/')]
    for m in members:
        m.name = m.name[5:]
    # Makes symlink targets relative
    for m in members:
        if not m.issym():
            continue
        linkname = PosixPath(m.linkname)
        if linkname.is_absolute:
            m.linkname = join_root(root, PosixPath(m.linkname)).path
    logging.info("Extracting files...")
    tar.extractall(str(root), members)
    tar.close()

    # Gets library paths
    lib_dirs = []
    p = subprocess.Popen(['/sbin/ldconfig', '-v', '-N'],
                         stdout=subprocess.PIPE)
    try:
        for l in p.stdout:
            if len(l) < 3 or l[0] in (b' ', b'\t'):
                continue
            if l.endswith(b':\n'):
                lib_dirs.append(Path(l[:-2]))
    finally:
        p.wait()

    # Original input files, so upload can restore them
    if any(run['input_files'] for run in runs):
        logging.info("Packing up original input files...")
        inputtar = tarfile.open(str(target / 'inputs.tar.gz'), 'w:gz')
        for run in runs:
            for ifile in itervalues(run['input_files']):
                inputtar.add(str(join_root(root, PosixPath(ifile))),
                             str(PosixPath(ifile)))
        inputtar.close()

    # Meta-data for reprounzip
    write_dict(target / '.reprounzip', {}, 'directory')

    signals.post_setup(target=target)
Ejemplo n.º 16
0
def chroot_create(args):
    """Unpacks the experiment in a folder so it can be run with chroot.

    All the files in the pack are unpacked; system files are copied only if
    they were not packed, and busybox is installed if /bin/sh wasn't packed.

    In addition, input files are put in a tar.gz (so they can be put back after
    an upload) and the configuration file is extracted.
    """
    if not args.pack:
        logging.critical("setup/create needs the pack filename")
        sys.exit(1)

    pack = Path(args.pack[0])
    target = Path(args.target[0])
    if target.exists():
        logging.critical("Target directory exists")
        sys.exit(1)

    if DefaultAbstractPath is not PosixPath:
        logging.critical("Not unpacking on POSIX system")
        sys.exit(1)

    signals.pre_setup(target=target, pack=pack)

    # We can only restore owner/group of files if running as root
    restore_owner = should_restore_owner(args.restore_owner)

    # Unpacks configuration file
    tar = tarfile.open(str(pack), 'r:*')
    member = tar.getmember('METADATA/config.yml')
    member.name = 'config.yml'
    tar.extract(member, str(target))

    # Loads config
    runs, packages, other_files = load_config_file(target / 'config.yml', True)

    target.mkdir()
    root = (target / 'root').absolute()
    root.mkdir()

    # Checks that everything was packed
    packages_not_packed = [pkg for pkg in packages if not pkg.packfiles]
    if packages_not_packed:
        logging.warning(
            "According to configuration, some files were left out "
            "because they belong to the following packages:%s"
            "\nWill copy files from HOST SYSTEM",
            ''.join('\n    %s' % pkg for pkg in packages_not_packed))
        for pkg in packages_not_packed:
            for f in pkg.files:
                f = Path(f.path)
                if not f.exists():
                    logging.error(
                        "Missing file %s (from package %s) on host, "
                        "experiment will probably miss it", f, pkg.name)
                dest = join_root(root, f)
                dest.parent.mkdir(parents=True)
                if f.is_link():
                    dest.symlink(f.read_link())
                else:
                    f.copy(dest)
                if restore_owner:
                    stat = f.stat()
                    dest.chown(stat.st_uid, stat.st_gid)

    # Unpacks files
    if any('..' in m.name or m.name.startswith('/') for m in tar.getmembers()):
        logging.critical("Tar archive contains invalid pathnames")
        sys.exit(1)
    members = [m for m in tar.getmembers() if m.name.startswith('DATA/')]
    for m in members:
        m.name = m.name[5:]
    if not restore_owner:
        uid = os.getuid()
        gid = os.getgid()
        for m in members:
            m.uid = uid
            m.gid = gid
    logging.info("Extracting files...")
    tar.extractall(str(root), members)
    tar.close()

    # Sets up /bin/sh and /usr/bin/env, downloading busybox if necessary
    sh_path = join_root(root, Path('/bin/sh'))
    env_path = join_root(root, Path('/usr/bin/env'))
    if not sh_path.lexists() or not env_path.lexists():
        logging.info("Setting up busybox...")
        busybox_path = join_root(root, Path('/bin/busybox'))
        busybox_path.parent.mkdir(parents=True)
        with make_dir_writable(join_root(root, Path('/bin'))):
            download_file(busybox_url(runs[0]['architecture']), busybox_path)
            busybox_path.chmod(0o755)
            if not sh_path.lexists():
                sh_path.parent.mkdir(parents=True)
                sh_path.symlink('/bin/busybox')
            if not env_path.lexists():
                env_path.parent.mkdir(parents=True)
                env_path.symlink('/bin/busybox')

    # Original input files, so upload can restore them
    if any(run['input_files'] for run in runs):
        logging.info("Packing up original input files...")
        inputtar = tarfile.open(str(target / 'inputs.tar.gz'), 'w:gz')
        for run in runs:
            for ifile in itervalues(run['input_files']):
                inputtar.add(str(join_root(root, PosixPath(ifile))),
                             str(PosixPath(ifile)))
        inputtar.close()

    # Meta-data for reprounzip
    write_dict(target / '.reprounzip', {}, 'chroot')

    signals.post_setup(target=target)