Esempio n. 1
0
def read_files(files, File=File):
    if files is None:
        return []
    return [File(PosixPath(f)) for f in files]
Esempio n. 2
0
def directory_run(args):
    """Runs the command in the directory.
    """
    target = Path(args.target[0])
    unpacked_info = metadata_read(target, 'directory')
    cmdline = args.cmdline

    # Loads config
    config = load_config_file(target / 'config.yml', True)
    runs = config.runs

    selected_runs = get_runs(runs, args.run, cmdline)

    root = (target / 'root').absolute()

    # Gets library paths
    lib_dirs = []
    p = subprocess.Popen(['/sbin/ldconfig', '-v', '-N'],
                         stdout=subprocess.PIPE)
    try:
        for l in p.stdout:
            if len(l) < 3 or l[0] in (b' ', b'\t'):
                continue
            if l.endswith(b':\n'):
                lib_dirs.append(Path(l[:-2]))
    finally:
        p.wait()
    lib_dirs = (
        'export LD_LIBRARY_PATH=%s' %
        ':'.join(shell_escape(unicode_(join_root(root, d))) for d in lib_dirs))

    cmds = [lib_dirs]
    for run_number in selected_runs:
        run = runs[run_number]
        cmd = 'cd %s && ' % shell_escape(
            unicode_(join_root(root, Path(run['workingdir']))))
        cmd += '/usr/bin/env -i '
        environ = run['environ']
        environ = fixup_environment(environ, args)
        if args.x11:
            if 'DISPLAY' in os.environ:
                environ['DISPLAY'] = os.environ['DISPLAY']
            if 'XAUTHORITY' in os.environ:
                environ['XAUTHORITY'] = os.environ['XAUTHORITY']
        cmd += ' '.join('%s=%s' % (shell_escape(k), shell_escape(v))
                        for k, v in iteritems(environ) if k != 'PATH')
        cmd += ' '

        # PATH
        # Get the original PATH components
        path = [
            PosixPath(d) for d in run['environ'].get('PATH', '').split(':')
        ]
        # The same paths but in the directory
        dir_path = [join_root(root, d) for d in path if d.root == '/']
        # Rebuild string
        path = ':'.join(unicode_(d) for d in dir_path + path)
        cmd += 'PATH=%s ' % shell_escape(path)

        # FIXME : Use exec -a or something if binary != argv[0]
        if cmdline is None:
            argv = run['argv']

            # Rewrites command-line arguments that are absolute filenames
            rewritten = False
            for i in irange(len(argv)):
                try:
                    p = Path(argv[i])
                except UnicodeEncodeError:
                    continue
                if p.is_absolute:
                    rp = join_root(root, p)
                    if (rp.exists() or
                        (len(rp.components) > 3 and rp.parent.exists())):
                        argv[i] = str(rp)
                        rewritten = True
            if rewritten:
                logging.warning("Rewrote command-line as: %s",
                                ' '.join(shell_escape(a) for a in argv))
        else:
            argv = cmdline
        cmd += ' '.join(shell_escape(a) for a in argv)
        cmds.append(cmd)
    cmds = ' && '.join(cmds)

    signals.pre_run(target=target)
    retcode = interruptible_call(cmds, shell=True)
    stderr.write("\n*** Command finished, status: %d\n" % retcode)
    signals.post_run(target=target, retcode=retcode)

    # Update input file status
    metadata_update_run(config, unpacked_info, selected_runs)
    metadata_write(target, unpacked_info, 'directory')
Esempio n. 3
0
def directory_create(args):
    """Unpacks the experiment in a folder.

    Only the files that are not part of a package are copied (unless they are
    missing from the system and were packed).

    In addition, input files are put in a tar.gz (so they can be put back after
    an upload) and the configuration file is extracted.
    """
    if not args.pack:
        logging.critical("setup needs the pack filename")
        sys.exit(1)

    pack = Path(args.pack[0])
    target = Path(args.target[0])
    if target.exists():
        logging.critical("Target directory exists")
        sys.exit(1)

    if not issubclass(DefaultAbstractPath, PosixPath):
        logging.critical("Not unpacking on POSIX system")
        sys.exit(1)

    signals.pre_setup(target=target, pack=pack)

    # Unpacks configuration file
    rpz_pack = RPZPack(pack)
    rpz_pack.extract_config(target / 'config.yml')

    # Loads config
    config = load_config_file(target / 'config.yml', True)
    packages = config.packages

    target.mkdir()
    root = (target / 'root').absolute()

    # Checks packages
    missing_files = False
    for pkg in packages:
        if pkg.packfiles:
            continue
        for f in pkg.files:
            if not Path(f.path).exists():
                logging.error(
                    "Missing file %s (from package %s that wasn't packed) "
                    "on host, experiment will probably miss it.", f, pkg.name)
                missing_files = True
    if missing_files:
        record_usage(directory_missing_pkgs=True)
        logging.error("Some packages are missing, you should probably install "
                      "them.\nUse 'reprounzip installpkgs -h' for help")

    root.mkdir()
    try:
        # Unpacks files
        members = rpz_pack.list_data()
        for m in members:
            # Remove 'DATA/' prefix
            m.name = str(rpz_pack.remove_data_prefix(m.name))
            # Makes symlink targets relative
            if m.issym():
                linkname = PosixPath(m.linkname)
                if linkname.is_absolute:
                    m.linkname = join_root(root, PosixPath(m.linkname)).path
        logging.info("Extracting files...")
        rpz_pack.extract_data(root, members)
        rpz_pack.close()

        # Original input files, so upload can restore them
        input_files = [
            f.path for f in itervalues(config.inputs_outputs) if f.read_runs
        ]
        if input_files:
            logging.info("Packing up original input files...")
            inputtar = tarfile.open(str(target / 'inputs.tar.gz'), 'w:gz')
            for ifile in input_files:
                filename = join_root(root, ifile)
                if filename.exists():
                    inputtar.add(str(filename), str(ifile))
            inputtar.close()

        # Meta-data for reprounzip
        metadata_write(target, metadata_initial_iofiles(config), 'directory')

        signals.post_setup(target=target, pack=pack)
    except Exception:
        rmtree_fixed(root)
        raise
Esempio n. 4
0
    def finalize(self):
        if not self.docker_copy:
            self.build_directory.rmtree()
            return

        from_image = self.unpacked_info['current_image']

        with self.build_directory.open('w',
                                       'Dockerfile',
                                       encoding='utf-8',
                                       newline='\n') as dockerfile:
            dockerfile.write('FROM %s\n\n' % from_image.decode('ascii'))
            for src, target in self.docker_copy:
                # FIXME : spaces in filenames will probably break Docker
                dockerfile.write(
                    'COPY \\\n    %s \\\n    %s\n' %
                    (shell_escape(str(src)), shell_escape(str(target))))

            for src, target in self.docker_copy:
                uid = gid = None

                # Keep permissions if the file is already in there
                tar = tarfile.open(str(self.target / 'data.tgz'), 'r:*')
                try:
                    info = tar.getmember(
                        str(join_root(PosixPath(b'DATA'), target)))
                    uid, gid = info.uid, info.gid
                except KeyError:
                    pass

                # Otherwise default on the first run's UID/GID
                if uid is None:
                    uid, gid = self.default_ownership

                # Lastly, use 1000
                if uid is None:
                    uid = gid = 1000

                dockerfile.write('RUN ["/busybox", "chown", "%d:%d", %s]\n' % (
                    uid,
                    gid,
                    json.dumps(str(target)),
                ))

        image = make_unique_name(b'reprounzip_image_')
        retcode = subprocess.call(self.docker_cmd +
                                  ['build', '-t', image, '.'],
                                  cwd=self.build_directory.path)
        if retcode != 0:
            logger.critical("docker build failed with code %d", retcode)
            sys.exit(1)
        else:
            logger.info("New image created: %s", image.decode('ascii'))
            if from_image != self.unpacked_info['initial_image']:
                logger.info("Untagging previous image %s",
                            from_image.decode('ascii'))
                retcode = subprocess.call(self.docker_cmd +
                                          ['rmi', from_image])
                if retcode != 0:
                    logger.warning(
                        "Can't remove previous image, docker "
                        "returned %d", retcode)
            self.unpacked_info['current_image'] = image
            write_dict(self.target, self.unpacked_info)

        self.build_directory.rmtree()
Esempio n. 5
0
    def __init__(self, enabled, target, display=None):
        self.enabled = enabled
        if not self.enabled:
            return

        self.target = target

        self.xauth = PosixPath('/.reprounzip_xauthority')
        self.display = (int(display)
                        if display is not None else self.DISPLAY_NUMBER)
        logger.debug(
            "X11 support enabled; will create Xauthority file %s "
            "for experiment. Display number is %d", self.xauth, self.display)

        # List of addresses that match the $DISPLAY variable
        possible, local_display = self._locate_display()
        tcp_portnum = ((6000 +
                        local_display) if local_display is not None else None)

        if ('XAUTHORITY' in os.environ
                and Path(os.environ['XAUTHORITY']).is_file()):
            xauthority = Path(os.environ['XAUTHORITY'])
        # Note: I'm assuming here that Xauthority has no XDG support
        else:
            xauthority = Path('~').expand_user() / '.Xauthority'

        # Read Xauthority file
        xauth_entries = {}
        if xauthority.is_file():
            with xauthority.open('rb') as fp:
                fp.seek(0, os.SEEK_END)
                size = fp.tell()
                fp.seek(0, os.SEEK_SET)
                while fp.tell() < size:
                    entry = Xauth.from_file(fp)
                    if (entry.name == 'MIT-MAGIC-COOKIE-1'
                            and entry.number == local_display):
                        if entry.family == Xauth.FAMILY_LOCAL:
                            xauth_entries[(entry.family, None)] = entry
                        elif (entry.family == Xauth.FAMILY_INTERNET
                              or entry.family == Xauth.FAMILY_INTERNET6):
                            xauth_entries[(entry.family,
                                           entry.address)] = entry
        # FIXME: this completely ignores addresses

        logger.debug("Possible X endpoints: %s", (possible, ))

        # Select socket and authentication cookie
        self.xauth_record = None
        self.connection_info = None
        for family, address in possible:
            # Checks that we have a cookie
            entry = family, (None if family is Xauth.FAMILY_LOCAL else address)
            if entry not in xauth_entries:
                continue
            if family == Xauth.FAMILY_LOCAL and hasattr(socket, 'AF_UNIX'):
                # Checks that the socket exists
                if not Path(address).exists():
                    continue
                self.connection_info = (socket.AF_UNIX, socket.SOCK_STREAM,
                                        address)
                self.xauth_record = xauth_entries[(family, None)]
                logger.debug(
                    "Will connect to local X display via UNIX "
                    "socket %s", address)
                break
            else:
                # Checks that we have a cookie
                family = self.X2SOCK[family]
                self.connection_info = (family, socket.SOCK_STREAM,
                                        (address, tcp_portnum))
                self.xauth_record = xauth_entries[(family, address)]
                logger.debug("Will connect to X display %s:%d via %s/TCP",
                             address, tcp_portnum,
                             "IPv6" if family == socket.AF_INET6 else "IPv4")
                break

        # Didn't find an Xauthority record -- assume no authentication is
        # needed, but still set self.connection_info
        if self.connection_info is None:
            for family, address in possible:
                # Only try UNIX sockets, we'll use 127.0.0.1 otherwise
                if family == Xauth.FAMILY_LOCAL:
                    if not hasattr(socket, 'AF_UNIX'):
                        continue
                    self.connection_info = (socket.AF_UNIX, socket.SOCK_STREAM,
                                            address)
                    logger.debug(
                        "Will connect to X display via UNIX socket "
                        "%s, no authentication", address)
                    break
            else:
                self.connection_info = (socket.AF_INET, socket.SOCK_STREAM,
                                        ('127.0.0.1', tcp_portnum))
                logger.debug(
                    "Will connect to X display 127.0.0.1:%d via "
                    "IPv4/TCP, no authentication", tcp_portnum)

        if self.connection_info is None:
            raise RuntimeError("Couldn't determine how to connect to local X "
                               "server, DISPLAY is %s" %
                               (repr(os.environ['DISPLAY'])
                                if 'DISPLAY' in os.environ else 'not set'))
Esempio n. 6
0
def vagrant_setup_create(args):
    """Sets up the experiment to be run in a Vagrant-built virtual machine.

    This can either build a chroot or not.

    If building a chroot, we do just like without Vagrant: we copy all the
    files and only get what's missing from the host. But we do install
    automatically the packages whose files are required.

    If not building a chroot, we install all the packages, and only unpack
    files that don't come from packages.

    In short: files from packages with packfiles=True will only be used if
    building a chroot.
    """
    if not args.pack:
        logging.critical("setup/create needs the pack filename")
        sys.exit(1)

    pack = Path(args.pack[0])
    target = Path(args.target[0])
    if target.exists():
        logging.critical("Target directory exists")
        sys.exit(1)
    use_chroot = args.use_chroot
    mount_bind = args.bind_magic_dirs
    record_usage(use_chroot=use_chroot, mount_bind=mount_bind)

    signals.pre_setup(target=target, pack=pack)

    # Unpacks configuration file
    rpz_pack = RPZPack(pack)
    rpz_pack.extract_config(target / 'config.yml')

    # Loads config
    runs, packages, other_files = config = load_config(target / 'config.yml',
                                                       True)

    if not args.memory:
        memory = None
    else:
        try:
            memory = int(args.memory[-1])
        except ValueError:
            logging.critical("Invalid value for memory size: %r", args.memory)
            sys.exit(1)

    if args.base_image and args.base_image[0]:
        record_usage(vagrant_explicit_image=True)
        box = args.base_image[0]
        if args.distribution:
            target_distribution = args.distribution[0]
        else:
            target_distribution = None
    else:
        target_distribution, box = select_box(runs, gui=args.gui)
    logging.info("Using box %s", box)
    logging.debug("Distribution: %s", target_distribution or "unknown")

    # If using chroot, we might still need to install packages to get missing
    # (not packed) files
    if use_chroot:
        packages = [pkg for pkg in packages if not pkg.packfiles]
        if packages:
            record_usage(vagrant_install_pkgs=True)
            logging.info(
                "Some packages were not packed, so we'll install and "
                "copy their files\n"
                "Packages that are missing:\n%s",
                ' '.join(pkg.name for pkg in packages))

    if packages:
        try:
            installer = select_installer(pack, runs, target_distribution)
        except CantFindInstaller as e:
            logging.error(
                "Need to install %d packages but couldn't select a "
                "package installer: %s", len(packages), e)

    target.mkdir(parents=True)

    try:
        # Writes setup script
        logging.info("Writing setup script %s...", target / 'setup.sh')
        with (target / 'setup.sh').open('w', encoding='utf-8',
                                        newline='\n') as fp:
            fp.write('#!/bin/sh\n\nset -e\n\n')
            if packages:
                # Updates package sources
                update_script = installer.update_script()
                if update_script:
                    fp.write(update_script)
                fp.write('\n')
                # Installs necessary packages
                fp.write(installer.install_script(packages))
                fp.write('\n')
                # TODO : Compare package versions (painful because of sh)

            # Untar
            if use_chroot:
                fp.write('\n' 'mkdir /experimentroot; cd /experimentroot\n')
                fp.write('tar zpxf /vagrant/data.tgz --numeric-owner '
                         '--strip=1 %s\n' % rpz_pack.data_prefix)
                if mount_bind:
                    fp.write('\n'
                             'mkdir -p /experimentroot/dev\n'
                             'mkdir -p /experimentroot/proc\n')

                for pkg in packages:
                    fp.write('\n# Copies files from package %s\n' % pkg.name)
                    for f in pkg.files:
                        f = f.path
                        dest = join_root(PosixPath('/experimentroot'), f)
                        fp.write('mkdir -p %s\n' %
                                 shell_escape(unicode_(f.parent)))
                        fp.write('cp -L %s %s\n' % (shell_escape(
                            unicode_(f)), shell_escape(unicode_(dest))))
            else:
                fp.write('\ncd /\n')
                paths = set()
                pathlist = []
                # Adds intermediate directories, and checks for existence in
                # the tar
                data_files = rpz_pack.data_filenames()
                for f in other_files:
                    path = PosixPath('/')
                    for c in rpz_pack.remove_data_prefix(f.path).components:
                        path = path / c
                        if path in paths:
                            continue
                        paths.add(path)
                        if path in data_files:
                            pathlist.append(path)
                        else:
                            logging.info("Missing file %s", path)
                # FIXME : for some reason we need reversed() here, I'm not sure
                # why. Need to read more of tar's docs.
                # TAR bug: --no-overwrite-dir removes --keep-old-files
                # TAR bug: there is no way to make --keep-old-files not report
                # an error if an existing file is encountered. --skip-old-files
                # was introduced too recently. Instead, we just ignore the exit
                # status
                with (target / 'rpz-files.list').open('wb') as lfp:
                    for p in reversed(pathlist):
                        lfp.write(join_root(rpz_pack.data_prefix, p).path)
                        lfp.write(b'\0')
                fp.write('tar zpxf /vagrant/data.tgz --keep-old-files '
                         '--numeric-owner --strip=1 '
                         '--null -T /vagrant/rpz-files.list || /bin/true\n')

            # Copies busybox
            if use_chroot:
                arch = runs[0]['architecture']
                download_file(busybox_url(arch), target / 'busybox',
                              'busybox-%s' % arch)
                fp.write(r'''
cp /vagrant/busybox /experimentroot/busybox
chmod +x /experimentroot/busybox
mkdir -p /experimentroot/bin
[ -e /experimentroot/bin/sh ] || \
    ln -s /busybox /experimentroot/bin/sh
''')

        # Copies pack
        logging.info("Copying pack file...")
        rpz_pack.copy_data_tar(target / 'data.tgz')

        rpz_pack.close()

        # Writes Vagrant file
        logging.info("Writing %s...", target / 'Vagrantfile')
        with (target / 'Vagrantfile').open('w', encoding='utf-8',
                                           newline='\n') as fp:
            # Vagrant header and version
            fp.write(
                '# -*- mode: ruby -*-\n'
                '# vi: set ft=ruby\n\n'
                'VAGRANTFILE_API_VERSION = "2"\n\n'
                'Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|\n')
            # Selects which box to install
            fp.write('  config.vm.box = "%s"\n' % box)
            # Run the setup script on the virtual machine
            fp.write('  config.vm.provision "shell", path: "setup.sh"\n')

            # Memory size
            if memory is not None or args.gui:
                fp.write('  config.vm.provider "virtualbox" do |v|\n')
                if memory is not None:
                    fp.write('    v.memory = %d\n' % memory)
                if args.gui:
                    fp.write('    v.gui = true\n')
                fp.write('  end\n')

            fp.write('end\n')

        # Meta-data for reprounzip
        write_dict(
            target,
            metadata_initial_iofiles(config, {
                'use_chroot': use_chroot,
                'gui': args.gui
            }))

        signals.post_setup(target=target, pack=pack)
    except Exception:
        target.rmtree(ignore_errors=True)
        raise
Esempio n. 7
0
def directory_run(args):
    """Runs the command in the directory.
    """
    target = Path(args.target[0])
    read_dict(target / '.reprounzip', 'directory')
    cmdline = args.cmdline

    # Loads config
    runs, packages, other_files = load_config_file(target / 'config.yml', True)

    selected_runs = get_runs(runs, args.run, cmdline)

    root = target / 'root'

    # Gets library paths
    lib_dirs = []
    p = subprocess.Popen(['/sbin/ldconfig', '-v', '-N'],
                         stdout=subprocess.PIPE)
    try:
        for l in p.stdout:
            if len(l) < 3 or l[0] in (b' ', b'\t'):
                continue
            if l.endswith(b':\n'):
                lib_dirs.append(Path(l[:-2]))
    finally:
        p.wait()
    lib_dirs = (
        'export LD_LIBRARY_PATH=%s' %
        ':'.join(shell_escape(unicode_(join_root(root, d))) for d in lib_dirs))

    cmds = [lib_dirs]
    for run_number in selected_runs:
        run = runs[run_number]
        cmd = 'cd %s && ' % shell_escape(
            unicode_(join_root(root, Path(run['workingdir']))))
        cmd += '/usr/bin/env -i '
        cmd += ' '.join('%s=%s' % (k, shell_escape(v))
                        for k, v in iteritems(run['environ']) if k != 'PATH')
        cmd += ' '

        # PATH
        # Get the original PATH components
        path = [
            PosixPath(d) for d in run['environ'].get('PATH', '').split(':')
        ]
        # The same paths but in the directory
        dir_path = [join_root(root, d) for d in path if d.root == '/']
        # Rebuild string
        path = ':'.join(unicode_(d) for d in dir_path + path)
        cmd += 'PATH=%s ' % shell_escape(path)

        # FIXME : Use exec -a or something if binary != argv[0]
        if cmdline is None:
            argv = run['argv']
        else:
            argv = cmdline
        cmd += ' '.join(shell_escape(a) for a in argv)
        cmds.append(cmd)
    cmds = ' && '.join(cmds)

    signals.pre_run(target=target)
    retcode = subprocess.call(cmds, shell=True)
    sys.stderr.write("\n*** Command finished, status: %d\n" % retcode)
    signals.post_run(target=target, retcode=retcode)
Esempio n. 8
0
def docker_setup_create(args):
    """Sets up the experiment to be run in a Docker-built container.
    """
    pack = Path(args.pack[0])
    target = Path(args.target[0])
    if target.exists():
        logging.critical("Target directory exists")
        sys.exit(1)

    signals.pre_setup(target=target, pack=pack)

    # Unpacks configuration file
    tar = tarfile.open(str(pack), 'r:*')
    member = tar.getmember('METADATA/config.yml')
    member.name = 'config.yml'
    tar.extract(member, str(target))
    tar.close()

    # Loads config
    runs, packages, other_files = load_config(target / 'config.yml', True)

    if args.base_image:
        target_distribution = None
        base_image = args.base_image[0]
    else:
        target_distribution, base_image = select_image(runs)
    logging.info("Using base image %s", base_image)
    logging.debug("Distribution: %s", target_distribution or "unknown")

    target.mkdir(parents=True)
    pack.copyfile(target / 'experiment.rpz')

    # Writes Dockerfile
    logging.info("Writing %s...", target / 'Dockerfile')
    with (target / 'Dockerfile').open('w',
                                      encoding='utf-8', newline='\n') as fp:
        fp.write('FROM %s\n\n' % base_image)
        fp.write('COPY experiment.rpz /reprozip_experiment.rpz\n\n')
        fp.write('RUN \\\n')

        # Installs missing packages
        packages = [pkg for pkg in packages if not pkg.packfiles]
        # FIXME : Right now, we need 'sudo' to be available (and it's not
        # necessarily in the base image)
        packages += [Package('sudo', None, packfiles=False)]
        if packages:
            installer = select_installer(pack, runs, target_distribution)
            # Updates package sources
            fp.write('    %s && \\\n' % installer.update_script())
            # Installs necessary packages
            fp.write('    %s && \\\n' % installer.install_script(packages))
        logging.info("Dockerfile will install the %d software packages that "
                     "were not packed", len(packages))

        # Untar
        paths = set()
        pathlist = []
        dataroot = PosixPath('DATA')
        # Adds intermediate directories, and checks for existence in the tar
        tar = tarfile.open(str(pack), 'r:*')
        for f in other_files:
            path = PosixPath('/')
            for c in f.path.components[1:]:
                path = path / c
                if path in paths:
                    continue
                paths.add(path)
                datapath = join_root(dataroot, path)
                try:
                    tar.getmember(str(datapath))
                except KeyError:
                    logging.info("Missing file %s", datapath)
                else:
                    pathlist.append(unicode_(datapath))
        tar.close()
        # FIXME : for some reason we need reversed() here, I'm not sure why.
        # Need to read more of tar's docs.
        # TAR bug: --no-overwrite-dir removes --keep-old-files
        fp.write('    cd / && tar zpxf /reprozip_experiment.rpz '
                 '--numeric-owner --strip=1 %s\n' %
                 ' '.join(shell_escape(p) for p in reversed(pathlist)))

    # Meta-data for reprounzip
    write_dict(target / '.reprounzip', {})

    signals.post_setup(target=target)
Esempio n. 9
0
def showfiles(args):
    """Writes out the input and output files.

    Works both for a pack file and for an extracted directory.
    """
    pack = Path(args.pack[0])

    if not pack.exists():
        logging.critical("Pack or directory %s does not exist", pack)
        sys.exit(1)

    if pack.is_dir():
        # Reads info from an unpacked directory
        runs, packages, other_files = load_config_file(pack / 'config.yml',
                                                       canonical=True)
        # The '.reprounzip' file is a pickled dictionary, it contains the name
        # of the files that replaced each input file (if upload was used)
        with pack.open('rb', '.reprounzip') as fp:
            unpacked_info = pickle.load(fp)
        input_files = unpacked_info.get('input_files', {})

        print("Input files:")
        for i, run in enumerate(runs):
            if len(runs) > 1:
                print("  Run %d:" % i)
            for input_name, path in iteritems(run['input_files']):
                if args.verbosity >= 2:
                    print("    %s (%s)" % (input_name, path))
                else:
                    print("    %s" % input_name)
                if input_files.get(input_name) is not None:
                    assigned = PosixPath(input_files[input_name])
                else:
                    assigned = "(original)"
                print("      %s" % assigned)

        print("Output files:")
        for i, run in enumerate(runs):
            if len(runs) > 1:
                print("  Run %d:" % i)
            for output_name, path in iteritems(run['output_files']):
                if args.verbosity >= 2:
                    print("    %s (%s)" % (output_name, path))
                else:
                    print("    %s" % output_name)

    else:  # pack.is_file()
        # Reads info from a pack file
        runs, packages, other_files = load_config(pack)

        print("Input files:")
        for i, run in enumerate(runs):
            if len(runs) > 1:
                print("  Run %d:" % i)
            for input_name, path in iteritems(run['input_files']):
                if args.verbosity >= 2:
                    print("    %s (%s)" % (input_name, path))
                else:
                    print("    %s" % input_name)

        print("Output files:")
        for i, run in enumerate(runs):
            if len(runs) > 1:
                print("  Run %d:" % i)
            for output_name, path in iteritems(run['output_files']):
                if args.verbosity >= 2:
                    print("    %s (%s)" % (output_name, path))
                else:
                    print("    %s" % output_name)
Esempio n. 10
0
File: misc.py Progetto: lcw/reprozip
    def run(self, files):
        reprounzip.common.record_usage(upload_files=len(files))
        inputs_outputs = self.get_config().inputs_outputs

        # No argument: list all the input files and exit
        if not files:
            print("Input files:")
            for input_name in sorted(n for n, f in inputs_outputs.items()
                                     if f.read_runs):
                assigned = self.input_files.get(input_name)
                if assigned is None:
                    assigned = "(original)"
                elif assigned is False:
                    assigned = "(not created)"
                elif assigned is True:
                    assigned = "(generated)"
                else:
                    assert isinstance(assigned, (bytes, str))
                print("    %s: %s" % (input_name, assigned))
            return

        self.prepare_upload(files)

        try:
            # Upload files
            for filespec in files:
                filespec_split = filespec.rsplit(':', 1)
                if len(filespec_split) != 2:
                    logger.critical("Invalid file specification: %r",
                                    filespec)
                    sys.exit(1)
                local_path, input_name = filespec_split

                if input_name.startswith('/'):
                    input_path = PosixPath(input_name)
                else:
                    try:
                        input_path = inputs_outputs[input_name].path
                    except KeyError:
                        logger.critical("Invalid input file: %r", input_name)
                        sys.exit(1)

                temp = None

                if not local_path:
                    # Restore original file from pack
                    logger.debug("Restoring input file %s", input_path)
                    fd, temp = Path.tempfile(prefix='reprozip_input_')
                    os.close(fd)
                    local_path = self.extract_original_input(input_name,
                                                             input_path,
                                                             temp)
                    if local_path is None:
                        temp.remove()
                        logger.warning("No original packed, can't restore "
                                       "input file %s", input_name)
                        continue
                else:
                    local_path = Path(local_path)
                    logger.debug("Uploading file %s to %s",
                                 local_path, input_path)
                    if not local_path.exists():
                        logger.critical("Local file %s doesn't exist",
                                        local_path)
                        sys.exit(1)

                self.upload_file(local_path, input_path)

                if temp is not None:
                    temp.remove()
                    self.input_files.pop(input_name, None)
                else:
                    self.input_files[input_name] = local_path.absolute().path
        finally:
            self.finalize()
Esempio n. 11
0
File: misc.py Progetto: lcw/reprozip
    def run(self, files, all_):
        reprounzip.common.record_usage(download_files=len(files))
        inputs_outputs = self.get_config().inputs_outputs

        # No argument: list all the output files and exit
        if not (all_ or files):
            print("Output files:")
            for output_name in sorted(n for n, f in inputs_outputs.items()
                                      if f.write_runs):
                print("    %s" % output_name)
            return

        # Parse the name[:path] syntax
        resolved_files = []
        all_files = set(n for n, f in inputs_outputs.items()
                        if f.write_runs)
        for filespec in files:
            filespec_split = filespec.split(':', 1)
            if len(filespec_split) == 1:
                output_name = local_path = filespec
            elif len(filespec_split) == 2:
                output_name, local_path = filespec_split
            else:
                logger.critical("Invalid file specification: %r",
                                filespec)
                sys.exit(1)
            local_path = Path(local_path) if local_path else None
            all_files.discard(output_name)
            resolved_files.append((output_name, local_path))

        # If all_ is set, add all the files that weren't explicitely named
        if all_:
            for output_name in all_files:
                resolved_files.append((output_name, Path(output_name)))

        self.prepare_download(resolved_files)

        success = True
        try:
            # Download files
            for output_name, local_path in resolved_files:
                if output_name.startswith('/'):
                    remote_path = PosixPath(output_name)
                else:
                    try:
                        remote_path = inputs_outputs[output_name].path
                    except KeyError:
                        logger.critical("Invalid output file: %r",
                                        output_name)
                        sys.exit(1)

                logger.debug("Downloading file %s", remote_path)
                if local_path is None:
                    ret = self.download_and_print(remote_path)
                else:
                    ret = self.download(remote_path, local_path)
                if ret is None:
                    ret = True
                    warnings.warn("download() returned None instead of "
                                  "True/False, assuming True",
                                  category=DeprecationWarning)
                if not ret:
                    success = False
            if not success:
                sys.exit(1)
        finally:
            self.finalize()
Esempio n. 12
0
 def test_lies_under(self):
     """ Tests the lies_under method."""
     self.assertTrue(PosixPath(b'/tmp')
                     .lies_under(b'/'))
     self.assertFalse(PosixPath(b'/tmp')
                      .lies_under(b'/var'))
     self.assertTrue(PosixPath(b'/tmp/some/file/here')
                     .lies_under(b'/tmp/some'))
     self.assertFalse(PosixPath(b'/tmp/some/file/here')
                      .lies_under(b'/tmp/no'))
     self.assertFalse(PosixPath(b'/tmp/some/file/here')
                      .lies_under(b'/no/tmp/some'))
     self.assertFalse(PosixPath(b'/tmp/some/file/here')
                      .lies_under(b'/no/some'))
     self.assertTrue(PosixPath(b'/tmp/some/file/here')
                     .lies_under(b'/tmp/some/file/here'))
     self.assertTrue(PosixPath(b'/')
                     .lies_under(b'/'))
     self.assertTrue(PosixPath(b'')
                     .lies_under(b''))
     self.assertTrue(PosixPath(b'test')
                     .lies_under(b''))
     self.assertFalse(PosixPath(b'')
                      .lies_under(b'test'))
     self.assertFalse(PosixPath(b'test')
                      .lies_under(b'/'))
Esempio n. 13
0
def directory_create(args):
    """Unpacks the experiment in a folder.

    Only the files that are not part of a package are copied (unless they are
    missing from the system and were packed).

    In addition, input files are put in a tar.gz (so they can be put back after
    an upload) and the configuration file is extracted.
    """
    if not args.pack:
        logging.critical("setup needs the pack filename")
        sys.exit(1)

    pack = Path(args.pack[0])
    target = Path(args.target[0])
    if target.exists():
        logging.critical("Target directory exists")
        sys.exit(1)

    if DefaultAbstractPath is not PosixPath:
        logging.critical("Not unpacking on POSIX system")
        sys.exit(1)

    signals.pre_setup(target=target, pack=pack)

    # Unpacks configuration file
    tar = tarfile.open(str(pack), 'r:*')
    member = tar.getmember('METADATA/config.yml')
    member.name = 'config.yml'
    tar.extract(member, str(target))

    # Loads config
    runs, packages, other_files = load_config_file(target / 'config.yml', True)

    target.mkdir()
    root = (target / 'root').absolute()
    root.mkdir()

    # Checks packages
    missing_files = False
    for pkg in packages:
        if pkg.packfiles:
            continue
        for f in pkg.files:
            f = Path(f.path)
            if not f.exists():
                logging.error(
                    "Missing file %s (from package %s that wasn't packed) "
                    "on host, experiment will probably miss it.", f, pkg.name)
                missing_files = True
    if missing_files:
        logging.error("Some packages are missing, you should probably install "
                      "them.\nUse 'reprounzip installpkgs -h' for help")

    # Unpacks files
    if any('..' in m.name or m.name.startswith('/') for m in tar.getmembers()):
        logging.critical("Tar archive contains invalid pathnames")
        sys.exit(1)
    members = [m for m in tar.getmembers() if m.name.startswith('DATA/')]
    for m in members:
        m.name = m.name[5:]
    # Makes symlink targets relative
    for m in members:
        if not m.issym():
            continue
        linkname = PosixPath(m.linkname)
        if linkname.is_absolute:
            m.linkname = join_root(root, PosixPath(m.linkname)).path
    logging.info("Extracting files...")
    tar.extractall(str(root), members)
    tar.close()

    # Gets library paths
    lib_dirs = []
    p = subprocess.Popen(['/sbin/ldconfig', '-v', '-N'],
                         stdout=subprocess.PIPE)
    try:
        for l in p.stdout:
            if len(l) < 3 or l[0] in (b' ', b'\t'):
                continue
            if l.endswith(b':\n'):
                lib_dirs.append(Path(l[:-2]))
    finally:
        p.wait()

    # Original input files, so upload can restore them
    if any(run['input_files'] for run in runs):
        logging.info("Packing up original input files...")
        inputtar = tarfile.open(str(target / 'inputs.tar.gz'), 'w:gz')
        for run in runs:
            for ifile in itervalues(run['input_files']):
                inputtar.add(str(join_root(root, PosixPath(ifile))),
                             str(PosixPath(ifile)))
        inputtar.close()

    # Meta-data for reprounzip
    write_dict(target / '.reprounzip', {}, 'directory')

    signals.post_setup(target=target)
Esempio n. 14
0
def chroot_create(args):
    """Unpacks the experiment in a folder so it can be run with chroot.

    All the files in the pack are unpacked; system files are copied only if
    they were not packed, and busybox is installed if /bin/sh wasn't packed.

    In addition, input files are put in a tar.gz (so they can be put back after
    an upload) and the configuration file is extracted.
    """
    if not args.pack:
        logging.critical("setup/create needs the pack filename")
        sys.exit(1)

    pack = Path(args.pack[0])
    target = Path(args.target[0])
    if target.exists():
        logging.critical("Target directory exists")
        sys.exit(1)

    if DefaultAbstractPath is not PosixPath:
        logging.critical("Not unpacking on POSIX system")
        sys.exit(1)

    signals.pre_setup(target=target, pack=pack)

    # We can only restore owner/group of files if running as root
    restore_owner = should_restore_owner(args.restore_owner)

    # Unpacks configuration file
    tar = tarfile.open(str(pack), 'r:*')
    member = tar.getmember('METADATA/config.yml')
    member.name = 'config.yml'
    tar.extract(member, str(target))

    # Loads config
    runs, packages, other_files = load_config_file(target / 'config.yml', True)

    target.mkdir()
    root = (target / 'root').absolute()
    root.mkdir()

    # Checks that everything was packed
    packages_not_packed = [pkg for pkg in packages if not pkg.packfiles]
    if packages_not_packed:
        logging.warning(
            "According to configuration, some files were left out "
            "because they belong to the following packages:%s"
            "\nWill copy files from HOST SYSTEM",
            ''.join('\n    %s' % pkg for pkg in packages_not_packed))
        for pkg in packages_not_packed:
            for f in pkg.files:
                f = Path(f.path)
                if not f.exists():
                    logging.error(
                        "Missing file %s (from package %s) on host, "
                        "experiment will probably miss it", f, pkg.name)
                dest = join_root(root, f)
                dest.parent.mkdir(parents=True)
                if f.is_link():
                    dest.symlink(f.read_link())
                else:
                    f.copy(dest)
                if restore_owner:
                    stat = f.stat()
                    dest.chown(stat.st_uid, stat.st_gid)

    # Unpacks files
    if any('..' in m.name or m.name.startswith('/') for m in tar.getmembers()):
        logging.critical("Tar archive contains invalid pathnames")
        sys.exit(1)
    members = [m for m in tar.getmembers() if m.name.startswith('DATA/')]
    for m in members:
        m.name = m.name[5:]
    if not restore_owner:
        uid = os.getuid()
        gid = os.getgid()
        for m in members:
            m.uid = uid
            m.gid = gid
    logging.info("Extracting files...")
    tar.extractall(str(root), members)
    tar.close()

    # Sets up /bin/sh and /usr/bin/env, downloading busybox if necessary
    sh_path = join_root(root, Path('/bin/sh'))
    env_path = join_root(root, Path('/usr/bin/env'))
    if not sh_path.lexists() or not env_path.lexists():
        logging.info("Setting up busybox...")
        busybox_path = join_root(root, Path('/bin/busybox'))
        busybox_path.parent.mkdir(parents=True)
        with make_dir_writable(join_root(root, Path('/bin'))):
            download_file(busybox_url(runs[0]['architecture']), busybox_path)
            busybox_path.chmod(0o755)
            if not sh_path.lexists():
                sh_path.parent.mkdir(parents=True)
                sh_path.symlink('/bin/busybox')
            if not env_path.lexists():
                env_path.parent.mkdir(parents=True)
                env_path.symlink('/bin/busybox')

    # Original input files, so upload can restore them
    if any(run['input_files'] for run in runs):
        logging.info("Packing up original input files...")
        inputtar = tarfile.open(str(target / 'inputs.tar.gz'), 'w:gz')
        for run in runs:
            for ifile in itervalues(run['input_files']):
                inputtar.add(str(join_root(root, PosixPath(ifile))),
                             str(PosixPath(ifile)))
        inputtar.close()

    # Meta-data for reprounzip
    write_dict(target / '.reprounzip', {}, 'chroot')

    signals.post_setup(target=target)
Esempio n. 15
0
def load_iofiles(config, runs):
    """Loads the inputs_outputs part of the configuration.

    This tests for duplicates, merge the lists of executions, and optionally
    loads from the runs for reprozip < 0.7 compatibility.
    """
    files_list = config.get('inputs_outputs') or []

    # reprozip < 0.7 compatibility: read input_files and output_files from runs
    if 'inputs_outputs' not in config:
        for i, run in enumerate(runs):
            for rkey, wkey in (('input_files', 'read_by_runs'),
                               ('output_files', 'written_by_runs')):
                for k, p in run.pop(rkey, {}).items():
                    files_list.append({'name': k, 'path': p, wkey: [i]})

    files = {}  # name:str: InputOutputFile
    paths = {}  # path:PosixPath: name:str
    required_keys = {'name', 'path'}
    optional_keys = {'read_by_runs', 'written_by_runs'}
    uniquenames = UniqueNames()
    for i, f in enumerate(files_list):
        keys = set(f)
        if (not keys.issubset(required_keys | optional_keys)
                or not keys.issuperset(required_keys)):
            raise InvalidConfig("File #%d has invalid keys")
        name = f['name']
        if name.startswith('/'):
            logger.warning(
                "File name looks like a path: %s, prefixing with "
                ".", name)
            name = '.%s' % name
        path = PosixPath(f['path'])
        readers = sorted(f.get('read_by_runs', []))
        writers = sorted(f.get('written_by_runs', []))
        if (not isinstance(readers, (tuple, list))
                or not all(isinstance(e, int) for e in readers)):
            raise InvalidConfig("read_by_runs should be a list of integers")
        if (not isinstance(writers, (tuple, list))
                or not all(isinstance(e, int) for e in writers)):
            raise InvalidConfig("written_by_runs should be a list of integers")
        if name in files:
            if files[name].path != path:
                old_name, name = name, uniquenames(name)
                logger.warning(
                    "File name appears multiple times: %s\n"
                    "Using name %s instead", old_name, name)
        else:
            uniquenames.insert(name)
        if path in paths:
            if paths[path] == name:
                logger.warning("File appears multiple times: %s", name)
            else:
                logger.warning(
                    "Two files have the same path (but different "
                    "names): %s, %s\nUsing name %s", name, paths[path],
                    paths[path])
                name = paths[path]
            files[name].read_runs.update(readers)
            files[name].write_runs.update(writers)
        else:
            paths[path] = name
            files[name] = InputOutputFile(path, readers, writers)

    return files
Esempio n. 16
0
def docker_setup_create(args):
    """Sets up the experiment to be run in a Docker-built container.
    """
    pack = Path(args.pack[0])
    target = Path(args.target[0])
    if target.exists():
        logging.critical("Target directory exists")
        sys.exit(1)

    signals.pre_setup(target=target, pack=pack)

    target.mkdir()

    try:
        # Unpacks configuration file
        rpz_pack = RPZPack(pack)
        rpz_pack.extract_config(target / 'config.yml')

        # Loads config
        runs, packages, other_files = config = load_config(
            target / 'config.yml', True)

        if args.base_image:
            record_usage(docker_explicit_base=True)
            base_image = args.base_image[0]
            if args.distribution:
                target_distribution = args.distribution[0]
            else:
                target_distribution = None
        else:
            target_distribution, base_image = select_image(runs)
        logging.info("Using base image %s", base_image)
        logging.debug("Distribution: %s", target_distribution or "unknown")

        rpz_pack.copy_data_tar(target / 'data.tgz')

        arch = runs[0]['architecture']

        # Writes Dockerfile
        logging.info("Writing %s...", target / 'Dockerfile')
        with (target / 'Dockerfile').open('w', encoding='utf-8',
                                          newline='\n') as fp:
            fp.write('FROM %s\n\n' % base_image)

            # Installs busybox
            download_file(busybox_url(arch), target / 'busybox',
                          'busybox-%s' % arch)
            fp.write('COPY busybox /busybox\n')

            # Installs rpzsudo
            download_file(sudo_url(arch), target / 'rpzsudo',
                          'rpzsudo-%s' % arch)
            fp.write('COPY rpzsudo /rpzsudo\n\n')

            fp.write('COPY data.tgz /reprozip_data.tgz\n\n')
            fp.write('COPY rpz-files.list /rpz-files.list\n')
            fp.write('RUN \\\n' '    chmod +x /busybox /rpzsudo && \\\n')

            if args.install_pkgs:
                # Install every package through package manager
                missing_packages = []
            else:
                # Only install packages that were not packed
                missing_packages = [pkg for pkg in packages if pkg.packfiles]
                packages = [pkg for pkg in packages if not pkg.packfiles]
            if packages:
                record_usage(docker_install_pkgs=True)
                try:
                    installer = select_installer(pack, runs,
                                                 target_distribution)
                except CantFindInstaller as e:
                    logging.error(
                        "Need to install %d packages but couldn't "
                        "select a package installer: %s", len(packages), e)
                    sys.exit(1)
                # Updates package sources
                update_script = installer.update_script()
                if update_script:
                    fp.write('    %s && \\\n' % update_script)
                # Installs necessary packages
                fp.write('    %s && \\\n' % installer.install_script(packages))
                logging.info(
                    "Dockerfile will install the %d software "
                    "packages that were not packed", len(packages))
            else:
                record_usage(docker_install_pkgs=False)

            # Untar
            paths = set()
            pathlist = []
            # Add intermediate directories, and check for existence in the tar
            logging.info("Generating file list...")
            missing_files = chain.from_iterable(pkg.files
                                                for pkg in missing_packages)
            data_files = rpz_pack.data_filenames()
            listoffiles = list(chain(other_files, missing_files))
            for f in listoffiles:
                if f.path.name == 'resolv.conf' and (
                        f.path.lies_under('/etc') or f.path.lies_under('/run')
                        or f.path.lies_under('/var')):
                    continue
                path = PosixPath('/')
                for c in rpz_pack.remove_data_prefix(f.path).components:
                    path = path / c
                    if path in paths:
                        continue
                    paths.add(path)
                    if path in data_files:
                        pathlist.append(path)
                    else:
                        logging.info("Missing file %s", path)
            rpz_pack.close()
            # FIXME : for some reason we need reversed() here, I'm not sure why
            # Need to read more of tar's docs.
            # TAR bug: --no-overwrite-dir removes --keep-old-files
            with (target / 'rpz-files.list').open('wb') as lfp:
                for p in reversed(pathlist):
                    lfp.write(join_root(rpz_pack.data_prefix, p).path)
                    lfp.write(b'\0')
            fp.write('    cd / && '
                     '(tar zpxf /reprozip_data.tgz -U --recursive-unlink '
                     '--numeric-owner --strip=1 --null -T /rpz-files.list || '
                     '/busybox echo "TAR reports errors, this might or might '
                     'not prevent the execution to run")\n')

        # Meta-data for reprounzip
        write_dict(target, metadata_initial_iofiles(config))

        signals.post_setup(target=target, pack=pack)
    except Exception:
        target.rmtree(ignore_errors=True)
        raise
Esempio n. 17
0
def generate(target, configfile, database, all_forks=False):
    """Main function for the graph subcommand.
    """
    # In here, a file is any file on the filesystem. A binary is a file, that
    # gets executed. A process is a system-level task, identified by its pid
    # (pids don't get reused in the database).
    # What I call program is the couple (process, binary), so forking creates a
    # new program (with the same binary) and exec'ing creates a new program as
    # well (with the same process)
    # Because of this, fork+exec will create an intermediate program that
    # doesn't do anything (new process but still old binary). If that program
    # doesn't do anything worth showing on the graph, it will be erased, unless
    # all_forks is True (--all-forks).

    # Reads package ownership from the configuration
    if not configfile.is_file():
        logging.critical("Configuration file does not exist!\n"
                         "Did you forget to run 'reprozip trace'?\n"
                         "If not, you might want to use --dir to specify an "
                         "alternate location.")
        sys.exit(1)
    runs, packages, other_files = load_config(configfile, canonical=False)
    packages = dict((f.path, pkg) for pkg in packages for f in pkg.files)

    if PY3:
        # On PY3, connect() only accepts unicode
        conn = sqlite3.connect(str(database))
    else:
        conn = sqlite3.connect(database.path)
    conn.row_factory = sqlite3.Row

    # This is a bit weird. We need to iterate on all types of events at the
    # same time, ordering by timestamp, so we decorate-sort-undecorate
    # Decoration adds timestamp (for sorting) and tags by event type, one of
    # 'process', 'open' or 'exec'

    # Reads processes from the database
    process_cursor = conn.cursor()
    process_rows = process_cursor.execute('''
        SELECT id, parent, timestamp
        FROM processes
        ORDER BY id
        ''')
    processes = {}
    all_programs = []

    # ... and opened files...
    file_cursor = conn.cursor()
    file_rows = file_cursor.execute('''
        SELECT name, timestamp, mode, process
        FROM opened_files
        ORDER BY id
        ''')
    binaries = set()
    files = OrderedSet()
    edges = OrderedSet()

    # ... as well as executed files.
    exec_cursor = conn.cursor()
    exec_rows = exec_cursor.execute('''
        SELECT name, timestamp, process, argv
        FROM executed_files
        ORDER BY id
        ''')

    # Loop on all event lists
    logging.info("Getting all events from database...")
    rows = heapq.merge(((r[2], 'process', r) for r in process_rows),
                       ((r[1], 'open', r) for r in file_rows),
                       ((r[1], 'exec', r) for r in exec_rows))
    for ts, event_type, data in rows:
        if event_type == 'process':
            r_id, r_parent, r_timestamp = data
            if r_parent is not None:
                parent = processes[r_parent]
                binary = parent.binary
            else:
                parent = None
                binary = None
            p = Process(r_id, parent, r_timestamp, False, binary,
                        C_INITIAL if r_parent is None else C_FORK)
            processes[r_id] = p
            all_programs.append(p)

        elif event_type == 'open':
            r_name, r_timestamp, r_mode, r_process = data
            r_name = PosixPath(r_name)
            if r_mode != FILE_WDIR:
                process = processes[r_process]
                files.add(r_name)
                edges.add((process, r_name, r_mode, None))

        elif event_type == 'exec':
            r_name, r_timestamp, r_process, r_argv = data
            r_name = PosixPath(r_name)
            process = processes[r_process]
            binaries.add(r_name)
            # Here we split this process in two "programs", unless the previous
            # one hasn't done anything since it was created via fork()
            if not all_forks and not process.acted:
                process.binary = r_name
                process.created = C_FORKEXEC
                process.acted = True
            else:
                process = Process(
                    process.pid,
                    process,
                    r_timestamp,
                    True,  # Hides exec only once
                    r_name,
                    C_EXEC)
                all_programs.append(process)
                processes[r_process] = process
            argv = tuple(r_argv.split('\0'))
            if not argv[-1]:
                argv = argv[:-1]
            edges.add((process, r_name, None, argv))

    process_cursor.close()
    file_cursor.close()
    conn.close()

    # Puts files in packages
    logging.info("Organizes packages...")
    package_files = {}
    other_files = []
    for f in files:
        pkg = packages.get(f)
        if pkg is not None:
            package_files.setdefault((pkg.name, pkg.version), []).append(f)
        else:
            other_files.append(f)

    # Writes DOT file
    with target.open('w', encoding='utf-8', newline='\n') as fp:
        fp.write('digraph G {\n    /* programs */\n    node [shape=box];\n')
        # Programs
        logging.info("Writing programs...")
        for program in all_programs:
            fp.write('    prog%d [label="%s (%d)"];\n' %
                     (id(program), program.binary or "-", program.pid))
            if program.parent is not None:
                reason = ''
                if program.created == C_FORK:
                    reason = "fork"
                elif program.created == C_EXEC:
                    reason = "exec"
                elif program.created == C_FORKEXEC:
                    reason = "fork+exec"
                fp.write('    prog%d -> prog%d [label="%s"];\n' %
                         (id(program.parent), id(program), reason))

        fp.write('\n    node [shape=ellipse];\n\n    /* system packages */\n')

        # Files from packages
        logging.info("Writing packages...")
        for i, ((name, version), files) in enumerate(iteritems(package_files)):
            fp.write('    subgraph cluster%d {\n        label=' % i)
            if version:
                fp.write('"%s %s";\n' % (escape(name), escape(version)))
            else:
                fp.write('"%s";\n' % escape(name))
            for f in files:
                fp.write('        "%s";\n' % escape(unicode_(f)))
            fp.write('    }\n')

        fp.write('\n    /* other files */\n')

        # Other files
        logging.info("Writing other files...")
        for f in other_files:
            fp.write('    "%s"\n' % escape(unicode_(f)))

        fp.write('\n')

        # Edges
        logging.info("Connecting edges...")
        for prog, f, mode, argv in edges:
            if mode is None:
                fp.write(
                    '    "%s" -> prog%d [color=blue, label="%s"];\n' %
                    (escape(unicode_(f)), id(prog), escape(' '.join(argv))))
            elif mode & FILE_WRITE:
                fp.write('    prog%d -> "%s" [color=red];\n' %
                         (id(prog), escape(unicode_(f))))
            elif mode & FILE_READ:
                fp.write('    "%s" -> prog%d [color=green];\n' %
                         (escape(unicode_(f)), id(prog)))

        fp.write('}\n')
Esempio n. 18
0
def generate(target, configfile, database, all_forks=False, graph_format='dot',
             level_pkgs='file', level_processes='thread',
             level_other_files='all',
             regex_filters=None, regex_includes=None,
             regex_replaces=None, aggregates=None):
    """Main function for the graph subcommand.
    """
    try:
        graph_format = {'dot': FORMAT_DOT, 'DOT': FORMAT_DOT,
                        'json': FORMAT_JSON, 'JSON': FORMAT_JSON}[graph_format]
    except KeyError:
        logger.critical("Unknown output format %r", graph_format)
        sys.exit(1)

    level_pkgs, level_processes, level_other_files, file_depth = \
        parse_levels(level_pkgs, level_processes, level_other_files)

    if target.exists():
        logger.critical("Output file %s exists", target)
        sys.exit(1)

    # Reads package ownership from the configuration
    if not configfile.is_file():
        logger.critical("Configuration file does not exist!\n"
                        "Did you forget to run 'reprozip trace'?\n"
                        "If not, you might want to use --dir to specify an "
                        "alternate location.")
        sys.exit(1)
    config = load_config(configfile, canonical=False)
    inputs_outputs = config.inputs_outputs
    inputs_outputs_map = dict((f.path, n)
                              for n, f in config.inputs_outputs.items())
    has_thread_flag = config.format_version >= LooseVersion('0.7')
    has_exit_timestamp = config.format_version >= LooseVersion('1.1')

    runs, files, edges = read_events(database, all_forks,
                                     has_thread_flag,
                                     has_exit_timestamp)

    # Label the runs
    if len(runs) != len(config.runs):
        logger.warning("Configuration file doesn't list the same number of "
                       "runs we found in the database!")
    else:
        for config_run, run in zip(config.runs, runs):
            run.name = config_run['id']

    # Apply regexes
    ignore = [lambda path, r=re.compile(p): r.search(path) is not None
              for p in regex_filters or []]
    include = [lambda path, r=re.compile(p): r.search(path) is not None
               for p in regex_includes or []]
    replace = [lambda path, r=re.compile(p): r.sub(repl, path)
               for p, repl in regex_replaces or []]

    def filefilter(path):
        pathuni = str(path)
        if include and not any(f(pathuni) for f in include):
            logger.debug("IGN(include) %s", pathuni)
            return None
        if any(f(pathuni) for f in ignore):
            logger.debug("IGN %s", pathuni)
            return None
        if not (replace or aggregates):
            return path
        for fi in replace:
            pathuni_ = fi(pathuni)
            if pathuni_ != pathuni:
                logger.debug("SUB %s -> %s", pathuni, pathuni_)
            pathuni = pathuni_
        for prefix in aggregates or []:
            if pathuni.startswith(prefix):
                logger.debug("AGG %s -> %s", pathuni, prefix)
                pathuni = prefix
                break
        return PosixPath(pathuni)

    files_new = set()
    for fi in files:
        fi = filefilter(fi)
        if fi is not None:
            files_new.add(fi)
    files = files_new

    edges_new = OrderedSet()
    for prog, fi, mode, argv in edges:
        fi = filefilter(fi)
        if fi is not None:
            edges_new.add((prog, fi, mode, argv))
    edges = edges_new

    # Puts files in packages
    package_map = {}
    if level_pkgs == LVL_PKG_IGNORE:
        packages = []
        other_files = files
    else:
        logger.info("Organizes packages...")
        file2package = dict((f.path, pkg)
                            for pkg in config.packages for f in pkg.files)
        packages = {}
        other_files = []
        for fi in files:
            pkg = file2package.get(fi)
            if pkg is not None:
                package = packages.get(pkg.name)
                if package is None:
                    package = Package(pkg.name, pkg.version,
                                      pkg.meta.get('section', None))
                    packages[pkg.name] = package
                package.files.add(fi)
                package_map[fi] = package
            else:
                other_files.append(fi)
        packages = sorted(packages.values(), key=lambda pkg: pkg.name)
        for i, pkg in enumerate(packages):
            pkg.id = i

    # Filter other files
    if level_other_files == LVL_OTHER_ALL and file_depth is not None:
        other_files = set(PosixPath(*f.components[:file_depth + 1])
                          for f in other_files)
        edges = OrderedSet((prog,
                            f if f in package_map
                            else PosixPath(*f.components[:file_depth + 1]),
                            mode,
                            argv)
                           for prog, f, mode, argv in edges)
    else:
        if level_other_files == LVL_OTHER_IO:
            other_files = set(f
                              for f in other_files if f in inputs_outputs_map)
            edges = [(prog, f, mode, argv)
                     for prog, f, mode, argv in edges
                     if f in package_map or f in other_files]
        elif level_other_files == LVL_OTHER_NO:
            other_files = set()
            edges = [(prog, f, mode, argv)
                     for prog, f, mode, argv in edges
                     if f in package_map]

    args = (target, runs, packages, other_files, package_map, edges,
            inputs_outputs, inputs_outputs_map,
            level_pkgs, level_processes, level_other_files)
    if graph_format == FORMAT_DOT:
        graph_dot(*args)
    elif graph_format == FORMAT_JSON:
        graph_json(*args)
    else:
        assert False
Esempio n. 19
0
def docker_setup_create(args):
    """Sets up the experiment to be run in a Docker-built container.
    """
    pack = Path(args.pack[0])
    target = Path(args.target[0])
    if target.exists():
        logger.critical("Target directory exists")
        sys.exit(1)

    signals.pre_setup(target=target, pack=pack)

    target.mkdir()

    try:
        # Unpacks configuration file
        rpz_pack = RPZPack(pack)
        rpz_pack.extract_config(target / 'config.yml')

        # Loads config
        runs, packages, other_files = config = load_config(
            target / 'config.yml', True)

        if args.base_image:
            record_usage(docker_explicit_base=True)
            base_image = args.base_image[0]
            if args.distribution:
                target_distribution = args.distribution[0]
            else:
                target_distribution = None
        else:
            target_distribution, base_image = select_image(runs)
        logger.info("Using base image %s", base_image)
        logger.debug("Distribution: %s", target_distribution or "unknown")

        rpz_pack.copy_data_tar(target / 'data.tgz')

        arch = runs[0]['architecture']

        # Writes Dockerfile
        logger.info("Writing %s...", target / 'Dockerfile')
        with (target / 'Dockerfile').open('w', encoding='utf-8',
                                          newline='\n') as fp:
            fp.write('FROM %s\n\n' % base_image)

            # Installs busybox
            download_file(busybox_url(arch), target / 'busybox',
                          'busybox-%s' % arch)
            fp.write('COPY busybox /busybox\n')

            # Installs rpzsudo
            download_file(sudo_url(arch), target / 'rpzsudo',
                          'rpzsudo-%s' % arch)
            fp.write('COPY rpzsudo /rpzsudo\n\n')

            fp.write('COPY data.tgz /reprozip_data.tgz\n\n')
            fp.write('COPY rpz-files.list /rpz-files.list\n')
            fp.write('RUN \\\n' '    chmod +x /busybox /rpzsudo && \\\n')

            if args.install_pkgs:
                # Install every package through package manager
                missing_packages = []
            else:
                # Only install packages that were not packed
                missing_packages = [pkg for pkg in packages if pkg.packfiles]
                packages = [pkg for pkg in packages if not pkg.packfiles]
            if packages:
                record_usage(docker_install_pkgs=True)
                try:
                    installer = select_installer(pack, runs,
                                                 target_distribution)
                except CantFindInstaller as e:
                    logger.error(
                        "Need to install %d packages but couldn't "
                        "select a package installer: %s", len(packages), e)
                    sys.exit(1)
                # Updates package sources
                update_script = installer.update_script()
                if update_script:
                    fp.write('    %s && \\\n' % update_script)
                # Installs necessary packages
                fp.write('    %s && \\\n' % installer.install_script(packages))
                logger.info(
                    "Dockerfile will install the %d software "
                    "packages that were not packed", len(packages))
            else:
                record_usage(docker_install_pkgs=False)

            # Untar
            paths = set()
            pathlist = []
            # Add intermediate directories, and check for existence in the tar
            logger.info("Generating file list...")
            missing_files = chain.from_iterable(pkg.files
                                                for pkg in missing_packages)
            data_files = rpz_pack.data_filenames()
            listoffiles = list(chain(other_files, missing_files))
            for f in listoffiles:
                if f.path.name == 'resolv.conf' and (
                        f.path.lies_under('/etc') or f.path.lies_under('/run')
                        or f.path.lies_under('/var')):
                    continue
                path = PosixPath('/')
                for c in rpz_pack.remove_data_prefix(f.path).components:
                    path = path / c
                    if path in paths:
                        continue
                    paths.add(path)
                    if path in data_files:
                        pathlist.append(path)
                    else:
                        logger.info("Missing file %s", path)
            rpz_pack.close()
            # FIXME : for some reason we need reversed() here, I'm not sure why
            # Need to read more of tar's docs.
            # TAR bug: --no-overwrite-dir removes --keep-old-files
            with (target / 'rpz-files.list').open('wb') as lfp:
                for p in reversed(pathlist):
                    lfp.write(join_root(rpz_pack.data_prefix, p).path)
                    lfp.write(b'\0')
            fp.write('    cd / && '
                     '(tar zpxf /reprozip_data.tgz -U --recursive-unlink '
                     '--numeric-owner --strip=1 --null -T /rpz-files.list || '
                     '/busybox echo "TAR reports errors, this might or might '
                     'not prevent the execution to run")\n')

            # Setup entry point
            fp.write('COPY rpz_entrypoint.sh /rpz_entrypoint.sh\n'
                     'ENTRYPOINT ["/busybox", "sh", "/rpz_entrypoint.sh"]\n')

        # Write entry point script
        logger.info("Writing %s...", target / 'rpz_entrypoint.sh')
        with (target / 'rpz_entrypoint.sh').open('w',
                                                 encoding='utf-8',
                                                 newline='\n') as fp:
            # The entrypoint gets some arguments from the run command
            # By default, it just does all the runs
            # "run N" executes the run with that number
            # "cmd STR" sets a replacement command-line for the next run
            # "do STR" executes a command as-is
            fp.write('#!/bin/sh\n'
                     '\n'
                     'COMMAND=\n'
                     'ENVVARS=\n'
                     '\n'
                     'if [ $# = 0 ]; then\n'
                     '    exec /busybox sh /rpz_entrypoint.sh')
            for nb in irange(len(runs)):
                fp.write(' run %d' % nb)
            fp.write(
                '\n'
                'fi\n'
                '\n'
                'while [ $# != 0 ]; do\n'
                '    case "$1" in\n'
                '        help)\n'
                '            echo "Image built from reprounzip-docker" >&2\n'
                '            echo "Usage: docker run <image> [cmd "word [word '
                '...]"] [run <R>]" >&2\n'
                '            echo "    \\`cmd ...\\` changes the command for '
                'the next \\`run\\` option" >&2\n'
                '            echo "    \\`run <name|number>\\` runs the '
                'specified run" >&2\n'
                '            echo "By default, all the runs are executed." '
                '>&2\n'
                '            echo "The runs in this image are:" >&2\n')
            for run in runs:
                fp.write(
                    '            echo "    {name}: {cmdline}" >&2\n'.format(
                        name=run['id'],
                        cmdline=' '.join(shell_escape(a)
                                         for a in run['argv'])))
            fp.write('            exit 0\n'
                     '        ;;\n'
                     '        do)\n'
                     '            shift\n'
                     '            $1\n'
                     '        ;;\n'
                     '        env)\n'
                     '            shift\n'
                     '            ENVVARS="$1"\n'
                     '        ;;\n'
                     '        cmd)\n'
                     '            shift\n'
                     '            COMMAND="$1"\n'
                     '        ;;\n'
                     '        run)\n'
                     '            shift\n'
                     '            case "$1" in\n')
            for i, run in enumerate(runs):
                cmdline = ' '.join([run['binary']] + run['argv'][1:])
                fp.write('                {name})\n'
                         '                    RUNCOMMAND={cmd}\n'
                         '                    RUNWD={wd}\n'
                         '                    RUNENV={env}\n'
                         '                    RUNUID={uid}\n'
                         '                    RUNGID={gid}\n'
                         '                ;;\n'.format(
                             name='%s|%d' % (run['id'], i),
                             cmd=shell_escape(cmdline),
                             wd=shell_escape(run['workingdir']),
                             env=shell_escape(' '.join(
                                 '%s=%s' % (shell_escape(k), shell_escape(v))
                                 for k, v in iteritems(run['environ']))),
                             uid=run.get('uid', 1000),
                             gid=run.get('gid', 1000)))
            fp.write(
                '                *)\n'
                '                    echo "RPZ: Unknown run $1" >&2\n'
                '                    exit 1\n'
                '                ;;\n'
                '            esac\n'
                '            if [ -n "$COMMAND" ]; then\n'
                '                RUNCOMMAND="$COMMAND"\n'
                '                COMMAND=\n'
                '            fi\n'
                '            export RUNWD; export RUNENV; export ENVVARS; '
                'export RUNCOMMAND\n'
                '            /rpzsudo "#$RUNUID" "#$RUNGID" /busybox sh -c '
                '"cd \\"\\$RUNWD\\" && /busybox env -i $RUNENV $ENVVARS '
                '$RUNCOMMAND; echo \\"*** Command finished, status: \\$?\\""\n'
                '            ENVVARS=\n'
                '        ;;\n'
                '        *)\n'
                '            echo "RPZ: Unknown option $1" >&2\n'
                '            exit 1\n'
                '        ;;\n'
                '    esac\n'
                '    shift\n'
                'done\n')

        # Meta-data for reprounzip
        write_dict(target, metadata_initial_iofiles(config))

        signals.post_setup(target=target, pack=pack)
    except Exception:
        target.rmtree(ignore_errors=True)
        raise
Esempio n. 20
0
    def run(self, files):
        runs = self.get_runs_from_config()

        # No argument: list all the input files and exit
        if not files:
            print("Input files:")
            for i, run in enumerate(runs):
                if len(runs) > 1:
                    print("  Run %d:" % i)
                for input_name in run['input_files']:
                    if self.input_files.get(input_name) is not None:
                        assigned = PosixPath(self.input_files[input_name])
                    else:
                        assigned = "(original)"
                    print("    %s: %s" % (input_name, assigned))
            return

        self.prepare_upload(files)

        # Get the path of each input file
        all_input_files = {}
        for run in runs:
            all_input_files.update(run['input_files'])

        try:
            # Upload files
            for filespec in files:
                filespec_split = filespec.rsplit(':', 1)
                if len(filespec_split) != 2:
                    logging.critical("Invalid file specification: %r",
                                     filespec)
                    sys.exit(1)
                local_path, input_name = filespec_split

                try:
                    input_path = PosixPath(all_input_files[input_name])
                except KeyError:
                    logging.critical("Invalid input file: %r", input_name)
                    sys.exit(1)

                temp = None

                if not local_path:
                    # Restore original file from pack
                    logging.debug("Restoring input file %s", input_path)
                    fd, temp = Path.tempfile(prefix='reprozip_input_')
                    os.close(fd)
                    local_path = self.extract_original_input(input_name,
                                                             input_path,
                                                             temp)
                else:
                    local_path = Path(local_path)
                    logging.debug("Uploading file %s to %s",
                                  local_path, input_path)
                    if not local_path.exists():
                        logging.critical("Local file %s doesn't exist",
                                         local_path)
                        sys.exit(1)

                self.upload_file(local_path, input_path)

                if temp is not None:
                    temp.remove()
                    del self.input_files[input_name]
                else:
                    self.input_files[input_name] = local_path.absolute().path
        finally:
            self.finalize()