Beispiel #1
0
def should_restore_owner(param):
    """Computes whether to restore original files' owners.
    """
    if os.getuid() != 0:
        if param is True:
            # Restoring the owner was explicitely requested
            logging.critical("Not running as root, cannot restore files' "
                             "owner/group as requested")
            sys.exit(1)
        elif param is None:
            # Nothing was requested
            logging.warning("Not running as root, won't restore files' "
                            "owner/group")
            ret = False
        else:
            # If False: skip warning
            ret = False
    else:
        if param is None:
            # Nothing was requested
            logging.info("Running as root, we will restore files' "
                         "owner/group")
            ret = True
        elif param is True:
            ret = True
        else:
            # If False: skip warning
            ret = False
    record_usage(restore_owner=ret)
    return ret
Beispiel #2
0
def should_restore_owner(param):
    """Computes whether to restore original files' owners.
    """
    if os.getuid() != 0:
        if param is True:
            # Restoring the owner was explicitely requested
            logger.critical("Not running as root, cannot restore files' "
                            "owner/group as requested")
            sys.exit(1)
        elif param is None:
            # Nothing was requested
            logger.warning("Not running as root, won't restore files' "
                           "owner/group")
            ret = False
        else:
            # If False: skip warning
            ret = False
    else:
        if param is None:
            # Nothing was requested
            logger.info("Running as root, we will restore files' "
                        "owner/group")
            ret = True
        elif param is True:
            ret = True
        else:
            # If False: skip warning
            ret = False
    record_usage(restore_owner=ret)
    return ret
Beispiel #3
0
def should_mount_magic_dirs(param):
    """Computes whether to mount directories inside the chroot.
    """
    if os.getuid() != 0:
        if param is True:
            # Restoring the owner was explicitely requested
            logger.critical("Not running as root, cannot mount /dev and "
                            "/proc")
            sys.exit(1)
        elif param is None:
            # Nothing was requested
            logger.warning("Not running as root, won't mount /dev and /proc")
            ret = False
        else:
            # If False: skip warning
            ret = False
    else:
        if param is None:
            # Nothing was requested
            logger.info("Running as root, will mount /dev and /proc")
            ret = True
        elif param is True:
            ret = True
        else:
            # If False: skip warning
            ret = False
    record_usage(mount_magic_dirs=ret)
    return ret
Beispiel #4
0
def should_mount_magic_dirs(param):
    """Computes whether to mount directories inside the chroot.
    """
    if os.getuid() != 0:
        if param is True:
            # Restoring the owner was explicitely requested
            logging.critical("Not running as root, cannot mount /dev and "
                             "/proc")
            sys.exit(1)
        elif param is None:
            # Nothing was requested
            logging.warning("Not running as root, won't mount /dev and /proc")
            ret = False
        else:
            # If False: skip warning
            ret = False
    else:
        if param is None:
            # Nothing was requested
            logging.info("Running as root, will mount /dev and /proc")
            ret = True
        elif param is True:
            ret = True
        else:
            # If False: skip warning
            ret = False
    record_usage(mount_magic_dirs=ret)
    return ret
Beispiel #5
0
def select_box(runs):
    """Selects a box for the experiment, with the correct distribution.
    """
    distribution, version = runs[0]['distribution']
    distribution = distribution.lower()
    architecture = runs[0]['architecture']

    record_usage(vagrant_select_box='%s;%s;%s' %
                 (distribution, version, architecture))

    if architecture not in ('i686', 'x86_64'):
        logging.critical("Error: unsupported architecture %s", architecture)
        sys.exit(1)

    # Ubuntu
    if distribution == 'ubuntu':
        if version == '12.04':
            if architecture == 'i686':
                return 'ubuntu', 'hashicorp/precise32'
            else:  # architecture == 'x86_64'
                return 'ubuntu', 'hashicorp/precise64'
        if version == '14.04':
            if architecture == 'i686':
                return 'ubuntu', 'ubuntu/trusty32'
            else:  # architecture == 'x86_64'
                return 'ubuntu', 'ubuntu/trusty64'
        if version != '15.04':
            logging.warning("using Ubuntu 15.04 'Vivid' instead of '%s'",
                            version)
        if architecture == 'i686':
            return 'ubuntu', 'ubuntu/vivid32'
        else:  # architecture == 'x86_64':
            return 'ubuntu', 'ubuntu/vivid64'

    # Debian
    else:
        if distribution != 'debian':
            logging.warning("unsupported distribution %s, using Debian",
                            distribution)
            version = '8'

        if (version == '7' or version.startswith('7.')
                or version.startswith('wheezy')):
            if architecture == 'i686':
                return 'debian', 'remram/debian-7-i386'
            else:  # architecture == 'x86_64'
                return 'debian', 'remram/debian-7-amd64'
        if (version != '8' and not version.startswith('8.')
                and not version.startswith('jessie')):
            logging.warning("using Debian 8 'Jessie' instead of '%s'", version)

        if architecture == 'i686':
            return 'debian', 'remram/debian-8-i386'
        else:  # architecture == 'x86_64':
            return 'debian', 'remram/debian-8-amd64'
Beispiel #6
0
def select_box(runs):
    """Selects a box for the experiment, with the correct distribution.
    """
    distribution, version = runs[0]['distribution']
    distribution = distribution.lower()
    architecture = runs[0]['architecture']

    record_usage(vagrant_select_box='%s;%s;%s' % (distribution, version,
                                                  architecture))

    if architecture not in ('i686', 'x86_64'):
        logging.critical("Error: unsupported architecture %s", architecture)
        sys.exit(1)

    # Ubuntu
    if distribution == 'ubuntu':
        if version == '12.04':
            if architecture == 'i686':
                return 'ubuntu', 'hashicorp/precise32'
            else:  # architecture == 'x86_64'
                return 'ubuntu', 'hashicorp/precise64'
        if version == '14.04':
            if architecture == 'i686':
                return 'ubuntu', 'ubuntu/trusty32'
            else:  # architecture == 'x86_64'
                return 'ubuntu', 'ubuntu/trusty64'
        if version != '15.04':
            logging.warning("using Ubuntu 15.04 'Vivid' instead of '%s'",
                            version)
        if architecture == 'i686':
            return 'ubuntu', 'ubuntu/vivid32'
        else:  # architecture == 'x86_64':
            return 'ubuntu', 'ubuntu/vivid64'

    # Debian
    else:
        if distribution != 'debian':
            logging.warning("unsupported distribution %s, using Debian",
                            distribution)
            version = '8'

        if (version == '7' or version.startswith('7.') or
                version.startswith('wheezy')):
            if architecture == 'i686':
                return 'debian', 'remram/debian-7-i386'
            else:  # architecture == 'x86_64'
                return 'debian', 'remram/debian-7-amd64'
        if (version != '8' and not version.startswith('8.') and
                not version.startswith('jessie')):
            logging.warning("using Debian 8 'Jessie' instead of '%s'", version)

        if architecture == 'i686':
            return 'debian', 'remram/debian-8-i386'
        else:  # architecture == 'x86_64':
            return 'debian', 'remram/debian-8-amd64'
Beispiel #7
0
def select_box(runs, gui=False):
    """Selects a box for the experiment, with the correct distribution.
    """
    distribution, version = runs[0]['distribution']
    distribution = distribution.lower()
    architecture = runs[0]['architecture']

    record_usage(vagrant_select_box='%s;%s;%s;gui=%s' % (distribution, version,
                                                         architecture, gui))

    if architecture not in ('i686', 'x86_64'):
        logging.critical("Error: unsupported architecture %s", architecture)
        sys.exit(1)

    def find_distribution(parameter, distribution, version, architecture):
        boxes = parameter['boxes']

        for distrib in boxes:
            if re.match(distrib['name'], distribution) is not None:
                result = find_version(distrib, version, architecture)
                if result is not None:
                    return result
        default = parameter['default']
        logging.warning("Unsupported distribution '%s', using %s",
                        distribution, default['name'])
        result = default['architectures'].get(architecture)
        if result:
            return default['distribution'], result

    def find_version(distrib, version, architecture):
        if version is not None:
            for box in distrib['versions']:
                if re.match(box['version'], version) is not None:
                    result = box['architectures'].get(architecture)
                    if result is not None:
                        return box['distribution'], result
        box = distrib['default']
        if version is not None:
            logging.warning("Using %s instead of '%s'",
                            box['name'], version)
        result = box['architectures'].get(architecture)
        if result is not None:
            return box['distribution'], result

    result = find_distribution(
        get_parameter('vagrant_boxes_x' if gui else 'vagrant_boxes'),
        distribution, version, architecture)
    if result is None:
        logging.critical("Error: couldn't find a base box for required "
                         "architecture")
        sys.exit(1)
    return result
Beispiel #8
0
def select_box(runs, gui=False):
    """Selects a box for the experiment, with the correct distribution.
    """
    distribution, version = runs[0]['distribution']
    distribution = distribution.lower()
    architecture = runs[0]['architecture']

    record_usage(vagrant_select_box='%s;%s;%s;gui=%s' % (distribution, version,
                                                         architecture, gui))

    if architecture not in ('i686', 'x86_64'):
        logger.critical("Error: unsupported architecture %s", architecture)
        sys.exit(1)

    def find_distribution(parameter, distribution, version, architecture):
        boxes = parameter['boxes']

        for distrib in boxes:
            if re.match(distrib['name'], distribution) is not None:
                result = find_version(distrib, version, architecture)
                if result is not None:
                    return result
        default = parameter['default']
        logger.warning("Unsupported distribution '%s', using %s",
                       distribution, default['name'])
        result = default['architectures'].get(architecture)
        if result:
            return default['distribution'], result

    def find_version(distrib, version, architecture):
        if version is not None:
            for box in distrib['versions']:
                if re.match(box['version'], version) is not None:
                    result = box['architectures'].get(architecture)
                    if result is not None:
                        return box['distribution'], result
        box = distrib['default']
        if version is not None:
            logger.warning("Using %s instead of '%s'",
                           box['name'], version)
        result = box['architectures'].get(architecture)
        if result is not None:
            return box['distribution'], result

    result = find_distribution(
        get_parameter('vagrant_boxes_x' if gui else 'vagrant_boxes'),
        distribution, version, architecture)
    if result is None:
        logger.critical("Error: couldn't find a base box for required "
                        "architecture")
        sys.exit(1)
    return result
Beispiel #9
0
def select_image(runs):
    """Selects a base image for the experiment, with the correct distribution.
    """
    distribution, version = runs[0]['distribution']
    distribution = distribution.lower()
    architecture = runs[0]['architecture']

    record_usage(docker_select_box='%s;%s;%s' % (distribution, version,
                                                 architecture))

    if architecture == 'i686':
        logging.info("wanted architecture was i686, but we'll use x86_64 with "
                     "Docker")
    elif architecture != 'x86_64':
        logging.error("Error: unsupported architecture %s", architecture)
        sys.exit(1)

    # Ubuntu
    if distribution == 'ubuntu':
        if version == '12.04':
            return 'ubuntu', 'ubuntu:12.04'
        elif version == '14.04':
            return 'ubuntu', 'ubuntu:14.04'
        elif version == '14.10':
            return 'ubuntu', 'ubuntu:14.10'
        else:
            if version != '15.04':
                logging.warning("using Ubuntu 15.04 'Vivid' instead of '%s'",
                                version)
            return 'ubuntu', 'ubuntu:15.04'

    # Debian
    else:
        if distribution != 'debian':
            logging.warning("unsupported distribution %s, using Debian",
                            distribution)
            version = '8'

        if (version == '6' or version.startswith('6.') or
                version.startswith('squeeze')):
            return 'debian', 'debian:squeeze'
        elif (version == '7' or version.startswith('7.') or
                version.startswith('wheezy')):
            return 'debian', 'debian:wheezy'
        else:
            if (version != '8' and not version.startswith('8.') and
                    not version.startswith('jessie')):
                logging.warning("using Debian 8 'Jessie' instead of '%s'",
                                version)
            return 'debian', 'debian:jessie'
Beispiel #10
0
def installpkgs(args):
    """Installs the necessary packages on the current machine.
    """
    if not THIS_DISTRIBUTION:
        logger.critical("Not running on Linux")
        sys.exit(1)

    pack = args.pack[0]
    missing = args.missing

    # Loads config
    runs, packages, other_files = load_config(pack)

    try:
        installer = select_installer(pack, runs)
    except CantFindInstaller as e:
        logger.error("Couldn't select a package installer: %s", e)

    if args.summary:
        # Print out a list of packages with their status
        if missing:
            print("Packages not present in pack:")
            packages = [pkg for pkg in packages if not pkg.packfiles]
        else:
            print("All packages:")
        pkgs = installer.get_packages_info(packages)
        for pkg in packages:
            print("    %s (required version: %s, status: %s)" %
                  (pkg.name, pkg.version, pkgs[pkg.name][1]))
    else:
        if missing:
            # With --missing, ignore packages whose files were packed
            packages = [pkg for pkg in packages if not pkg.packfiles]

        # Installs packages
        record_usage(installpkgs_installing=len(packages))
        r, pkgs = installer.install(packages, assume_yes=args.assume_yes)
        for pkg in packages:
            req = pkg.version
            real = pkgs[pkg.name][1]
            if real == PKG_NOT_INSTALLED:
                logger.warning("package %s was not installed", pkg.name)
            else:
                logger.warning(
                    "version %s of %s was installed, instead of "
                    "%s", real, pkg.name, req)
        if r != 0:
            logger.critical("Installer exited with %d", r)
            sys.exit(r)
Beispiel #11
0
def installpkgs(args):
    """Installs the necessary packages on the current machine.
    """
    if not THIS_DISTRIBUTION:
        logging.critical("Not running on Linux")
        sys.exit(1)

    pack = args.pack[0]
    missing = args.missing

    # Loads config
    runs, packages, other_files = load_config(pack)

    try:
        installer = select_installer(pack, runs)
    except CantFindInstaller as e:
        logging.error("Couldn't select a package installer: %s", e)

    if args.summary:
        # Print out a list of packages with their status
        if missing:
            print("Packages not present in pack:")
            packages = [pkg for pkg in packages if not pkg.packfiles]
        else:
            print("All packages:")
        pkgs = installer.get_packages_info(packages)
        for pkg in packages:
            print("    %s (required version: %s, status: %s)" % (
                  pkg.name, pkg.version, pkgs[pkg.name][1]))
    else:
        if missing:
            # With --missing, ignore packages whose files were packed
            packages = [pkg for pkg in packages if not pkg.packfiles]

        # Installs packages
        record_usage(installpkgs_installing=len(packages))
        r, pkgs = installer.install(packages, assume_yes=args.assume_yes)
        for pkg in packages:
            req = pkg.version
            real = pkgs[pkg.name][1]
            if real == PKG_NOT_INSTALLED:
                logging.warning("package %s was not installed", pkg.name)
            else:
                logging.warning("version %s of %s was installed, instead of "
                                "%s", real, pkg.name, req)
        if r != 0:
            logging.critical("Installer exited with %d", r)
            sys.exit(r)
Beispiel #12
0
def select_image(runs):
    """Selects a base image for the experiment, with the correct distribution.
    """
    distribution, version = runs[0]['distribution']
    distribution = distribution.lower()
    architecture = runs[0]['architecture']

    record_usage(docker_select_box='%s;%s;%s' % (distribution, version,
                                                 architecture))

    if architecture == 'i686':
        logging.info("Wanted architecture was i686, but we'll use x86_64 with "
                     "Docker")
    elif architecture != 'x86_64':
        logging.error("Error: unsupported architecture %s", architecture)
        sys.exit(1)

    def find_distribution(parameter, distribution, version):
        images = parameter['images']

        for distrib in images:
            if re.match(distrib['name'], distribution) is not None:
                result = find_version(distrib, version)
                if result is not None:
                    return result
        default = parameter['default']
        logging.warning("Unsupported distribution '%s', using %s",
                        distribution, default['name'])
        return default['distribution'], default['image']

    def find_version(distrib, version):
        if version is not None:
            for image in distrib['versions']:
                if re.match(image['version'], version) is not None:
                    return image['distribution'], image['image']
        image = distrib['default']
        if version is not None:
            logging.warning("Using %s instead of '%s'",
                            image['name'], version)
        return image['distribution'], image['image']

    return find_distribution(get_parameter('docker_images'),
                             distribution, version)
Beispiel #13
0
def select_image(runs):
    """Selects a base image for the experiment, with the correct distribution.
    """
    distribution, version = runs[0]['distribution']
    distribution = distribution.lower()
    architecture = runs[0]['architecture']

    record_usage(docker_select_box='%s;%s;%s' % (distribution, version,
                                                 architecture))

    if architecture == 'i686':
        logger.info("Wanted architecture was i686, but we'll use x86_64 with "
                    "Docker")
    elif architecture != 'x86_64':
        logger.error("Error: unsupported architecture %s", architecture)
        sys.exit(1)

    def find_distribution(parameter, distribution, version):
        images = parameter['images']

        for distrib in images:
            if re.match(distrib['name'], distribution) is not None:
                result = find_version(distrib, version)
                if result is not None:
                    return result
        default = parameter['default']
        logger.warning("Unsupported distribution '%s', using %s",
                       distribution, default['name'])
        return default['distribution'], default['image']

    def find_version(distrib, version):
        if version is not None:
            for image in distrib['versions']:
                if re.match(image['version'], version) is not None:
                    return image['distribution'], image['image']
        image = distrib['default']
        if version is not None:
            logger.warning("Using %s instead of '%s'",
                           image['name'], version)
        return image['distribution'], image['image']

    return find_distribution(get_parameter('docker_images'),
                             distribution, version)
Beispiel #14
0
def do_vistrails(target):
    """Create a VisTrails workflow that runs the experiment.

    This is called from signals after an experiment has been setup by any
    unpacker.
    """
    record_usage(do_vistrails=True)
    unpacker = signals.unpacker
    dot_vistrails = Path('~/.vistrails').expand_user()

    runs, packages, other_files = load_config(target / 'config.yml',
                                              canonical=True)
    for i, run in enumerate(runs):
        module_name = write_cltools_module(run, dot_vistrails)

        # Writes VisTrails workflow
        bundle = target / 'vistrails.vt'
        logging.info("Writing VisTrails workflow %s...", bundle)
        vtdir = Path.tempdir(prefix='reprounzip_vistrails_')
        try:
            with vtdir.open('w', 'vistrail',
                            encoding='utf-8', newline='\n') as fp:
                vistrail = VISTRAILS_TEMPLATE
                cmdline = ' '.join(shell_escape(arg)
                                   for arg in run['argv'])
                vistrail = vistrail.format(
                        date='2014-11-12 15:31:18',
                        unpacker=unpacker,
                        directory=escape_xml(str(target.absolute())),
                        cmdline=escape_xml(cmdline),
                        module_name=module_name,
                        run=i)
                fp.write(vistrail)

            with bundle.open('wb') as fp:
                z = zipfile.ZipFile(fp, 'w')
                with vtdir.in_dir():
                    for path in Path('.').recursedir():
                        z.write(str(path))
                z.close()
        finally:
            vtdir.rmtree()
Beispiel #15
0
def do_vistrails(target):
    """Create a VisTrails workflow that runs the experiment.

    This is called from signals after an experiment has been setup by any
    unpacker.
    """
    record_usage(do_vistrails=True)
    unpacker = signals.unpacker
    dot_vistrails = Path('~/.vistrails').expand_user()

    runs, packages, other_files = load_config(target / 'config.yml',
                                              canonical=True)
    for i, run in enumerate(runs):
        module_name = write_cltools_module(run, dot_vistrails)

        # Writes VisTrails workflow
        bundle = target / 'vistrails.vt'
        logging.info("Writing VisTrails workflow %s...", bundle)
        vtdir = Path.tempdir(prefix='reprounzip_vistrails_')
        try:
            with vtdir.open('w', 'vistrail', encoding='utf-8',
                            newline='\n') as fp:
                vistrail = VISTRAILS_TEMPLATE
                cmdline = ' '.join(shell_escape(arg) for arg in run['argv'])
                vistrail = vistrail.format(date='2014-11-12 15:31:18',
                                           unpacker=unpacker,
                                           directory=escape_xml(
                                               str(target.absolute())),
                                           cmdline=escape_xml(cmdline),
                                           module_name=module_name,
                                           run=i)
                fp.write(vistrail)

            with bundle.open('wb') as fp:
                z = zipfile.ZipFile(fp, 'w')
                with vtdir.in_dir():
                    for path in Path('.').recursedir():
                        z.write(str(path))
                z.close()
        finally:
            vtdir.rmtree()
Beispiel #16
0
def vagrant_setup_create(args):
    """Sets up the experiment to be run in a Vagrant-built virtual machine.

    This can either build a chroot or not.

    If building a chroot, we do just like without Vagrant: we copy all the
    files and only get what's missing from the host. But we do install
    automatically the packages whose files are required.

    If not building a chroot, we install all the packages, and only unpack
    files that don't come from packages.

    In short: files from packages with packfiles=True will only be used if
    building a chroot.
    """
    if not args.pack:
        logging.critical("setup/create needs the pack filename")
        sys.exit(1)

    pack = Path(args.pack[0])
    target = Path(args.target[0])
    if target.exists():
        logging.critical("Target directory exists")
        sys.exit(1)
    use_chroot = args.use_chroot
    mount_bind = args.bind_magic_dirs
    record_usage(use_chroot=use_chroot,
                 mount_bind=mount_bind)

    signals.pre_setup(target=target, pack=pack)

    # Unpacks configuration file
    rpz_pack = RPZPack(pack)
    rpz_pack.extract_config(target / 'config.yml')

    # Loads config
    runs, packages, other_files = config = load_config(target / 'config.yml',
                                                       True)

    if not args.memory:
        memory = None
    else:
        try:
            memory = int(args.memory[-1])
        except ValueError:
            logging.critical("Invalid value for memory size: %r", args.memory)
            sys.exit(1)

    if args.base_image and args.base_image[0]:
        record_usage(vagrant_explicit_image=True)
        box = args.base_image[0]
        if args.distribution:
            target_distribution = args.distribution[0]
        else:
            target_distribution = None
    else:
        target_distribution, box = select_box(runs)
    logging.info("Using box %s", box)
    logging.debug("Distribution: %s", target_distribution or "unknown")

    # If using chroot, we might still need to install packages to get missing
    # (not packed) files
    if use_chroot:
        packages = [pkg for pkg in packages if not pkg.packfiles]
        if packages:
            record_usage(vagrant_install_pkgs=True)
            logging.info("Some packages were not packed, so we'll install and "
                         "copy their files\n"
                         "Packages that are missing:\n%s",
                         ' '.join(pkg.name for pkg in packages))

    if packages:
        try:
            installer = select_installer(pack, runs, target_distribution)
        except CantFindInstaller as e:
            logging.error("Need to install %d packages but couldn't select a "
                          "package installer: %s",
                          len(packages), e)

    target.mkdir(parents=True)

    try:
        # Writes setup script
        logging.info("Writing setup script %s...", target / 'setup.sh')
        with (target / 'setup.sh').open('w', encoding='utf-8',
                                        newline='\n') as fp:
            fp.write('#!/bin/sh\n\nset -e\n\n')
            if packages:
                # Updates package sources
                fp.write(installer.update_script())
                fp.write('\n')
                # Installs necessary packages
                fp.write(installer.install_script(packages))
                fp.write('\n')
                # TODO : Compare package versions (painful because of sh)

            # Untar
            if use_chroot:
                fp.write('\n'
                         'mkdir /experimentroot; cd /experimentroot\n')
                fp.write('tar zpxf /vagrant/data.tgz --numeric-owner '
                         '--strip=1 %s\n' % rpz_pack.data_prefix)
                if mount_bind:
                    fp.write('\n'
                             'mkdir -p /experimentroot/dev\n'
                             'mkdir -p /experimentroot/proc\n')

                for pkg in packages:
                    fp.write('\n# Copies files from package %s\n' % pkg.name)
                    for f in pkg.files:
                        f = f.path
                        dest = join_root(PosixPath('/experimentroot'), f)
                        fp.write('mkdir -p %s\n' %
                                 shell_escape(unicode_(f.parent)))
                        fp.write('cp -L %s %s\n' % (
                                 shell_escape(unicode_(f)),
                                 shell_escape(unicode_(dest))))
            else:
                fp.write('\ncd /\n')
                paths = set()
                pathlist = []
                # Adds intermediate directories, and checks for existence in
                # the tar
                for f in other_files:
                    path = PosixPath('/')
                    for c in rpz_pack.remove_data_prefix(f.path).components:
                        path = path / c
                        if path in paths:
                            continue
                        paths.add(path)
                        try:
                            rpz_pack.get_data(path)
                        except KeyError:
                            logging.info("Missing file %s", path)
                        else:
                            pathlist.append(path)
                # FIXME : for some reason we need reversed() here, I'm not sure
                # why. Need to read more of tar's docs.
                # TAR bug: --no-overwrite-dir removes --keep-old-files
                # TAR bug: there is no way to make --keep-old-files not report
                # an error if an existing file is encountered. --skip-old-files
                # was introduced too recently. Instead, we just ignore the exit
                # status
                with (target / 'rpz-files.list').open('wb') as lfp:
                    for p in reversed(pathlist):
                        lfp.write(join_root(rpz_pack.data_prefix, p).path)
                        lfp.write(b'\0')
                fp.write('tar zpxf /vagrant/data.tgz --keep-old-files '
                         '--numeric-owner --strip=1 '
                         '--null -T /vagrant/rpz-files.list || /bin/true\n')

            # Copies busybox
            if use_chroot:
                arch = runs[0]['architecture']
                download_file(busybox_url(arch),
                              target / 'busybox',
                              'busybox-%s' % arch)
                fp.write(r'''
cp /vagrant/busybox /experimentroot/busybox
chmod +x /experimentroot/busybox
mkdir -p /experimentroot/bin
[ -e /experimentroot/bin/sh ] || \
    ln -s /busybox /experimentroot/bin/sh
''')

        # Copies pack
        logging.info("Copying pack file...")
        rpz_pack.copy_data_tar(target / 'data.tgz')

        rpz_pack.close()

        # Writes Vagrant file
        logging.info("Writing %s...", target / 'Vagrantfile')
        with (target / 'Vagrantfile').open('w', encoding='utf-8',
                                           newline='\n') as fp:
            # Vagrant header and version
            fp.write(
                '# -*- mode: ruby -*-\n'
                '# vi: set ft=ruby\n\n'
                'VAGRANTFILE_API_VERSION = "2"\n\n'
                'Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|\n')
            # Selects which box to install
            fp.write('  config.vm.box = "%s"\n' % box)
            # Run the setup script on the virtual machine
            fp.write('  config.vm.provision "shell", path: "setup.sh"\n')

            # Memory size
            if memory is not None:
                fp.write('  config.vm.provider "virtualbox" do |v|\n'
                         '    v.memory = %d\n'
                         '  end\n' % memory)

            fp.write('end\n')

        # Meta-data for reprounzip
        write_dict(target,
                   metadata_initial_iofiles(config,
                                            {'use_chroot': use_chroot}))

        signals.post_setup(target=target, pack=pack)
    except Exception:
        target.rmtree(ignore_errors=True)
        raise
Beispiel #17
0
def do_vistrails(target, pack=None, **kwargs):
    """Create a VisTrails workflow that runs the experiment.

    This is called from signals after an experiment has been setup by any
    unpacker.
    """
    record_usage(do_vistrails=True)

    config = load_config(target / 'config.yml', canonical=True)

    # Writes VisTrails workflow
    bundle = target / 'vistrails.vt'
    logging.info("Writing VisTrails workflow %s...", bundle)
    vtdir = Path.tempdir(prefix='reprounzip_vistrails_')
    ids = IdScope()
    try:
        with vtdir.open('w', 'vistrail',
                        encoding='utf-8', newline='\n') as fp:
            wf = Workflow(fp, ids)

            # Directory module, refering to this directory
            d = wf.add_module('%s:Directory' % rpz_id, rpz_version)
            wf.add_function(d, 'directory',
                            [(directory_sig, str(target.resolve()))])

            connect_from = d

            for i, run in enumerate(config.runs):
                inputs = sorted(n for n, f in iteritems(config.inputs_outputs)
                                if i in f.read_runs)
                outputs = sorted(n for n, f in iteritems(config.inputs_outputs)
                                 if i in f.write_runs)
                ports = itertools.chain((('input', p) for p in inputs),
                                        (('output', p) for p in outputs))

                # Run module
                r = wf.add_module('%s:Run' % rpz_id, rpz_version)
                wf.add_function(r, 'cmdline', [
                                (string_sig,
                                 ' '.join(shell_escape(arg)
                                          for arg in run['argv']))])
                wf.add_function(r, 'run_number', [(integer_sig, i)])

                # Port specs for input/output files
                for type_, name in ports:
                    wf.add_port_spec(r, name, type_, [file_pkg_mod])

                # Draw connection
                wf.connect(connect_from, experiment_sig, 'experiment',
                           r, experiment_sig, 'experiment')
                connect_from = r

            wf.close()

        with bundle.open('wb') as fp:
            z = zipfile.ZipFile(fp, 'w')
            with vtdir.in_dir():
                for path in Path('.').recursedir():
                    z.write(str(path))
            z.close()
    finally:
        vtdir.rmtree()
Beispiel #18
0
def docker_setup_create(args):
    """Sets up the experiment to be run in a Docker-built container.
    """
    pack = Path(args.pack[0])
    target = Path(args.target[0])
    if target.exists():
        logging.critical("Target directory exists")
        sys.exit(1)

    signals.pre_setup(target=target, pack=pack)

    # Unpacks configuration file
    rpz_pack = RPZPack(pack)
    rpz_pack.extract_config(target / 'config.yml')

    # Loads config
    runs, packages, other_files = load_config(target / 'config.yml', True)

    if args.base_image:
        record_usage(docker_explicit_base=True)
        base_image = args.base_image[0]
        if args.distribution:
            target_distribution = args.distribution[0]
        else:
            target_distribution = None
    else:
        target_distribution, base_image = select_image(runs)
    logging.info("Using base image %s", base_image)
    logging.debug("Distribution: %s", target_distribution or "unknown")

    target.mkdir(parents=True)
    rpz_pack.copy_data_tar(target / 'data.tgz')

    arch = runs[0]['architecture']

    # Writes Dockerfile
    logging.info("Writing %s...", target / 'Dockerfile')
    with (target / 'Dockerfile').open('w',
                                      encoding='utf-8', newline='\n') as fp:
        fp.write('FROM %s\n\n' % base_image)

        # Installs busybox
        download_file(busybox_url(arch),
                      target / 'busybox',
                      'busybox-%s' % arch)
        fp.write('COPY busybox /busybox\n')

        # Installs rpzsudo
        download_file(sudo_url(arch),
                      target / 'rpzsudo',
                      'rpzsudo-%s' % arch)
        fp.write('COPY rpzsudo /rpzsudo\n\n')

        fp.write('COPY data.tgz /reprozip_data.tgz\n\n')
        fp.write('COPY rpz-files.list /rpz-files.list\n')
        fp.write('RUN \\\n'
                 '    chmod +x /busybox /rpzsudo && \\\n')

        if args.install_pkgs:
            # Install every package through package manager
            missing_packages = []
        else:
            # Only install packages that were not packed
            missing_packages = [pkg for pkg in packages if pkg.packfiles]
            packages = [pkg for pkg in packages if not pkg.packfiles]
        if packages:
            record_usage(docker_install_pkgs=True)
            try:
                installer = select_installer(pack, runs, target_distribution)
            except CantFindInstaller as e:
                logging.error("Need to install %d packages but couldn't "
                              "select a package installer: %s",
                              len(packages), e)
                sys.exit(1)
            # Updates package sources
            fp.write('    %s && \\\n' % installer.update_script())
            # Installs necessary packages
            fp.write('    %s && \\\n' % installer.install_script(packages))
            logging.info("Dockerfile will install the %d software packages "
                         "that were not packed", len(packages))
        else:
            record_usage(docker_install_pkgs=False)

        # Untar
        paths = set()
        pathlist = []
        # Adds intermediate directories, and checks for existence in the tar
        missing_files = chain.from_iterable(pkg.files
                                            for pkg in missing_packages)
        for f in chain(other_files, missing_files):
            path = PosixPath('/')
            for c in rpz_pack.remove_data_prefix(f.path).components:
                path = path / c
                if path in paths:
                    continue
                paths.add(path)
                try:
                    rpz_pack.get_data(path)
                except KeyError:
                    logging.info("Missing file %s", path)
                else:
                    pathlist.append(path)
        rpz_pack.close()
        # FIXME : for some reason we need reversed() here, I'm not sure why.
        # Need to read more of tar's docs.
        # TAR bug: --no-overwrite-dir removes --keep-old-files
        with (target / 'rpz-files.list').open('wb') as lfp:
            for p in reversed(pathlist):
                lfp.write(join_root(rpz_pack.data_prefix, p).path)
                lfp.write(b'\0')
        fp.write('    cd / && '
                 '(tar zpxf /reprozip_data.tgz -U --recursive-unlink '
                 '--numeric-owner --strip=1 --null -T /rpz-files.list || '
                 '/busybox echo "TAR reports errors, this might or might '
                 'not prevent the execution to run")\n')

    # Meta-data for reprounzip
    write_dict(target / '.reprounzip', {})

    signals.post_setup(target=target, pack=pack)
Beispiel #19
0
def run_interactive(ssh_info, interactive, cmd, request_pty, forwarded_ports):
    """Runs a command on an SSH server.

    If `interactive` is True, we'll try to find an ``ssh`` executable, falling
    back to paramiko if it's not found. The terminal handling code is a bit
    wonky, so using ``ssh`` is definitely a good idea, especially on Windows.
    Non-interactive commands should run fine.

    :param ssh_info: dict with `hostname`, `port`, `username`, `key_filename`,
    passed directly to paramiko
    :type ssh_info: dict
    :param interactive: whether to connect local input to the remote process
    :type interactive: bool
    :param cmd: command-line to run on the server
    :type cmd: str
    :param request_pty: whether to request a PTY from the SSH server
    :type request_pty: bool
    :param forwarded_ports: ports to forward back to us; iterable of pairs
    ``(port_number, connector)`` where `port_number` is the remote port number
    and `connector` is the connector object used to build the connected socket
    to forward to on this side
    :type forwarded_ports: collections.Iterable[(int, object)]
    """
    if interactive:
        ssh_exe = find_ssh_executable()
    else:
        ssh_exe = None

    if interactive and ssh_exe:
        record_usage(vagrant_ssh='ssh')
        args = [ssh_exe,
                '-t' if request_pty else '-T',  # Force allocation of PTY
                '-o', 'StrictHostKeyChecking=no',  # Silently accept host keys
                '-o', 'UserKnownHostsFile=/dev/null',  # Don't store host keys
                '-i', ssh_info['key_filename'],
                '-p', '%d' % ssh_info['port']]
        for remote_port, connector in forwarded_ports:
            # Remote port will connect to a local port
            fwd = LocalForwarder(connector)
            args.append('-R%d:127.0.0.1:%d' % (remote_port, fwd.local_port))
        args.append('%s@%s' % (ssh_info['username'],
                               ssh_info['hostname']))
        args.append(cmd)
        return interruptible_call(args)

    else:
        record_usage(vagrant_ssh='interactive' if interactive else 'simple')
        # Connects to the machine
        ssh = paramiko.SSHClient()
        ssh.set_missing_host_key_policy(IgnoreMissingKey())
        ssh.connect(**ssh_info)

        # Starts forwarding
        forwarders = []
        for remote_port, connector in forwarded_ports:
            forwarders.append(
                SSHForwarder(ssh.get_transport(), remote_port, connector))

        chan = ssh.get_transport().open_session()
        if request_pty:
            chan.get_pty()

        # Execute command
        logging.info("Connected via SSH, running command...")
        chan.exec_command(cmd)

        # Get output
        if interactive:
            interactive_shell(chan)
        else:
            chan.shutdown_write()
            while True:
                data = chan.recv(1024)
                if len(data) == 0:
                    break
                stdout_bytes.write(data)
                stdout_bytes.flush()
        retcode = chan.recv_exit_status()
        ssh.close()
        return retcode
Beispiel #20
0
def chroot_create(args):
    """Unpacks the experiment in a folder so it can be run with chroot.

    All the files in the pack are unpacked; system files are copied only if
    they were not packed, and busybox is installed if /bin/sh wasn't packed.

    In addition, input files are put in a tar.gz (so they can be put back after
    an upload) and the configuration file is extracted.
    """
    if not args.pack:
        logger.critical("setup/create needs the pack filename")
        sys.exit(1)

    pack = Path(args.pack[0])
    target = Path(args.target[0])
    if target.exists():
        logger.critical("Target directory exists")
        sys.exit(1)

    if not issubclass(DefaultAbstractPath, PosixPath):
        logger.critical("Not unpacking on POSIX system")
        sys.exit(1)

    signals.pre_setup(target=target, pack=pack)

    # We can only restore owner/group of files if running as root
    restore_owner = should_restore_owner(args.restore_owner)

    # Unpacks configuration file
    rpz_pack = RPZPack(pack)
    rpz_pack.extract_config(target / 'config.yml')

    # Loads config
    config = load_config_file(target / 'config.yml', True)
    packages = config.packages

    target.mkdir()
    root = (target / 'root').absolute()

    root.mkdir()
    try:
        # Checks that everything was packed
        packages_not_packed = [pkg for pkg in packages if not pkg.packfiles]
        if packages_not_packed:
            record_usage(chroot_missing_pkgs=True)
            logger.warning("According to configuration, some files were left "
                           "out because they belong to the following "
                           "packages:%s\nWill copy files from HOST SYSTEM",
                           ''.join('\n    %s' % pkg
                                   for pkg in packages_not_packed))
            missing_files = False
            for pkg in packages_not_packed:
                for f in pkg.files:
                    path = Path(f.path)
                    if not path.exists():
                        logger.error(
                            "Missing file %s (from package %s) on host, "
                            "experiment will probably miss it",
                            path, pkg.name)
                        missing_files = True
                        continue
                    dest = join_root(root, path)
                    dest.parent.mkdir(parents=True)
                    if path.is_link():
                        dest.symlink(path.read_link())
                    else:
                        path.copy(dest)
                    if restore_owner:
                        stat = path.stat()
                        dest.chown(stat.st_uid, stat.st_gid)
            if missing_files:
                record_usage(chroot_mising_files=True)

        # Unpacks files
        members = rpz_pack.list_data()
        for m in members:
            # Remove 'DATA/' prefix
            m.name = str(rpz_pack.remove_data_prefix(m.name))
        if not restore_owner:
            uid = os.getuid()
            gid = os.getgid()
            for m in members:
                m.uid = uid
                m.gid = gid
        logger.info("Extracting files...")
        rpz_pack.extract_data(root, members)
        rpz_pack.close()

        resolvconf_src = Path('/etc/resolv.conf')
        if resolvconf_src.exists():
            try:
                resolvconf_src.copy(root / 'etc/resolv.conf')
            except IOError:
                pass

        # Sets up /bin/sh and /usr/bin/env, downloading busybox if necessary
        sh_path = join_root(root, Path('/bin/sh'))
        env_path = join_root(root, Path('/usr/bin/env'))
        if not sh_path.lexists() or not env_path.lexists():
            logger.info("Setting up busybox...")
            busybox_path = join_root(root, Path('/bin/busybox'))
            busybox_path.parent.mkdir(parents=True)
            with make_dir_writable(join_root(root, Path('/bin'))):
                download_file(busybox_url(config.runs[0]['architecture']),
                              busybox_path,
                              'busybox-%s' % config.runs[0]['architecture'])
                busybox_path.chmod(0o755)
                if not sh_path.lexists():
                    sh_path.parent.mkdir(parents=True)
                    sh_path.symlink('/bin/busybox')
                if not env_path.lexists():
                    env_path.parent.mkdir(parents=True)
                    env_path.symlink('/bin/busybox')

        # Original input files, so upload can restore them
        input_files = [f.path for f in config.inputs_outputs.values()
                       if f.read_runs]
        if input_files:
            logger.info("Packing up original input files...")
            inputtar = tarfile.open(str(target / 'inputs.tar.gz'), 'w:gz')
            for ifile in input_files:
                filename = join_root(root, ifile)
                if filename.exists():
                    inputtar.add(str(filename), str(ifile))
            inputtar.close()

        # Meta-data for reprounzip
        metadata_write(target, metadata_initial_iofiles(config), 'chroot')

        signals.post_setup(target=target, pack=pack)
    except Exception:
        rmtree_fixed(root)
        raise
Beispiel #21
0
def directory_create(args):
    """Unpacks the experiment in a folder.

    Only the files that are not part of a package are copied (unless they are
    missing from the system and were packed).

    In addition, input files are put in a tar.gz (so they can be put back after
    an upload) and the configuration file is extracted.
    """
    if not args.pack:
        logging.critical("setup needs the pack filename")
        sys.exit(1)

    pack = Path(args.pack[0])
    target = Path(args.target[0])
    if target.exists():
        logging.critical("Target directory exists")
        sys.exit(1)

    if not issubclass(DefaultAbstractPath, PosixPath):
        logging.critical("Not unpacking on POSIX system")
        sys.exit(1)

    signals.pre_setup(target=target, pack=pack)

    # Unpacks configuration file
    rpz_pack = RPZPack(pack)
    rpz_pack.extract_config(target / 'config.yml')

    # Loads config
    config = load_config_file(target / 'config.yml', True)
    packages = config.packages

    target.mkdir()
    root = (target / 'root').absolute()

    # Checks packages
    missing_files = False
    for pkg in packages:
        if pkg.packfiles:
            continue
        for f in pkg.files:
            if not Path(f.path).exists():
                logging.error(
                    "Missing file %s (from package %s that wasn't packed) "
                    "on host, experiment will probably miss it.",
                    f, pkg.name)
                missing_files = True
    if missing_files:
        record_usage(directory_missing_pkgs=True)
        logging.error("Some packages are missing, you should probably install "
                      "them.\nUse 'reprounzip installpkgs -h' for help")

    root.mkdir()
    try:
        # Unpacks files
        members = rpz_pack.list_data()
        for m in members:
            # Remove 'DATA/' prefix
            m.name = str(rpz_pack.remove_data_prefix(m.name))
            # Makes symlink targets relative
            if m.issym():
                linkname = PosixPath(m.linkname)
                if linkname.is_absolute:
                    m.linkname = join_root(root, PosixPath(m.linkname)).path
        logging.info("Extracting files...")
        rpz_pack.extract_data(root, members)
        rpz_pack.close()

        # Original input files, so upload can restore them
        input_files = [f.path for f in itervalues(config.inputs_outputs)
                       if f.read_runs]
        if input_files:
            logging.info("Packing up original input files...")
            inputtar = tarfile.open(str(target / 'inputs.tar.gz'), 'w:gz')
            for ifile in input_files:
                filename = join_root(root, ifile)
                if filename.exists():
                    inputtar.add(str(filename), str(ifile))
            inputtar.close()

        # Meta-data for reprounzip
        metadata_write(target, metadata_initial_iofiles(config), 'directory')

        signals.post_setup(target=target, pack=pack)
    except Exception:
        rmtree_fixed(root)
        raise
Beispiel #22
0
def main():
    """Entry point when called on the command-line.
    """
    # Locale
    locale.setlocale(locale.LC_ALL, '')

    # Parses command-line

    # General options
    def add_options(opts):
        opts.add_argument('--version', action='version',
                          version="reprounzip version %s" % __version__)

    # Loads plugins
    for name, func, descr, descr_1 in get_plugins('reprounzip.plugins'):
        func()

    parser = RPUZArgumentParser(
        description="reprounzip is the ReproZip component responsible for "
                    "unpacking and reproducing an experiment previously "
                    "packed with reprozip",
        epilog="Please report issues to [email protected]")
    add_options(parser)
    parser.add_argument('-v', '--verbose', action='count', default=1,
                        dest='verbosity',
                        help="augments verbosity level")
    subparsers = parser.add_subparsers(title="subcommands", metavar='')

    # usage_report subcommand
    parser_stats = subparsers.add_parser(
        'usage_report',
        help="Enables or disables anonymous usage reports")
    add_options(parser_stats)
    parser_stats.add_argument('--enable', action='store_true')
    parser_stats.add_argument('--disable', action='store_true')
    parser_stats.set_defaults(func=usage_report)

    # Loads unpackers
    for name, func, descr, descr_1 in get_plugins('reprounzip.unpackers'):
        plugin_parser = subparsers.add_parser(
            name, help=descr_1, description=descr,
            formatter_class=argparse.RawDescriptionHelpFormatter)
        add_options(plugin_parser)
        info = func(plugin_parser)
        plugin_parser.set_defaults(selected_unpacker=name)
        if info is None:
            info = {}
        unpackers[name] = info

    signals.pre_parse_args(parser=parser, subparsers=subparsers)
    args = parser.parse_args()
    signals.post_parse_args(args=args)
    if getattr(args, 'func', None) is None:
        parser.print_help(sys.stderr)
        sys.exit(2)
    signals.unpacker = getattr(args, 'selected_unpacker', None)
    setup_logging('REPROUNZIP', args.verbosity)

    setup_usage_report('reprounzip', __version__)
    if hasattr(args, 'selected_unpacker'):
        record_usage(unpacker=args.selected_unpacker)
    signals.pre_setup.subscribe(lambda **kw: record_usage(setup=True))
    signals.pre_run.subscribe(lambda **kw: record_usage(run=True))

    try:
        try:
            args.func(args)
        except UsageError:
            raise
        except Exception as e:
            signals.application_finishing(reason=e)
            submit_usage_report(result=type(e).__name__)
            raise
        else:
            signals.application_finishing(reason=None)
    except UsageError:
        parser.print_help(sys.stderr)
        sys.exit(2)
    submit_usage_report(result='success')
    sys.exit(0)
Beispiel #23
0
def chroot_create(args):
    """Unpacks the experiment in a folder so it can be run with chroot.

    All the files in the pack are unpacked; system files are copied only if
    they were not packed, and busybox is installed if /bin/sh wasn't packed.

    In addition, input files are put in a tar.gz (so they can be put back after
    an upload) and the configuration file is extracted.
    """
    if not args.pack:
        logging.critical("setup/create needs the pack filename")
        sys.exit(1)

    pack = Path(args.pack[0])
    target = Path(args.target[0])
    if target.exists():
        logging.critical("Target directory exists")
        sys.exit(1)

    if DefaultAbstractPath is not PosixPath:
        logging.critical("Not unpacking on POSIX system")
        sys.exit(1)

    signals.pre_setup(target=target, pack=pack)

    # We can only restore owner/group of files if running as root
    restore_owner = should_restore_owner(args.restore_owner)

    # Unpacks configuration file
    rpz_pack = RPZPack(pack)
    rpz_pack.extract_config(target / 'config.yml')

    # Loads config
    config = load_config_file(target / 'config.yml', True)
    packages = config.packages

    target.mkdir()
    root = (target / 'root').absolute()

    root.mkdir()
    try:
        # Checks that everything was packed
        packages_not_packed = [pkg for pkg in packages if not pkg.packfiles]
        if packages_not_packed:
            record_usage(chroot_missing_pkgs=True)
            logging.warning("According to configuration, some files were left "
                            "out because they belong to the following "
                            "packages:%s\nWill copy files from HOST SYSTEM",
                            ''.join('\n    %s' % pkg
                                    for pkg in packages_not_packed))
            missing_files = False
            for pkg in packages_not_packed:
                for f in pkg.files:
                    path = Path(f.path)
                    if not path.exists():
                        logging.error(
                            "Missing file %s (from package %s) on host, "
                            "experiment will probably miss it",
                            path, pkg.name)
                        missing_files = True
                        continue
                    dest = join_root(root, path)
                    dest.parent.mkdir(parents=True)
                    if path.is_link():
                        dest.symlink(path.read_link())
                    else:
                        path.copy(dest)
                    if restore_owner:
                        stat = path.stat()
                        dest.chown(stat.st_uid, stat.st_gid)
            if missing_files:
                record_usage(chroot_mising_files=True)

        # Unpacks files
        members = rpz_pack.list_data()
        for m in members:
            # Remove 'DATA/' prefix
            m.name = str(rpz_pack.remove_data_prefix(m.name))
        if not restore_owner:
            uid = os.getuid()
            gid = os.getgid()
            for m in members:
                m.uid = uid
                m.gid = gid
        logging.info("Extracting files...")
        rpz_pack.extract_data(root, members)
        rpz_pack.close()

        # Sets up /bin/sh and /usr/bin/env, downloading busybox if necessary
        sh_path = join_root(root, Path('/bin/sh'))
        env_path = join_root(root, Path('/usr/bin/env'))
        if not sh_path.lexists() or not env_path.lexists():
            logging.info("Setting up busybox...")
            busybox_path = join_root(root, Path('/bin/busybox'))
            busybox_path.parent.mkdir(parents=True)
            with make_dir_writable(join_root(root, Path('/bin'))):
                download_file(busybox_url(config.runs[0]['architecture']),
                              busybox_path,
                              'busybox-%s' % config.runs[0]['architecture'])
                busybox_path.chmod(0o755)
                if not sh_path.lexists():
                    sh_path.parent.mkdir(parents=True)
                    sh_path.symlink('/bin/busybox')
                if not env_path.lexists():
                    env_path.parent.mkdir(parents=True)
                    env_path.symlink('/bin/busybox')

        # Original input files, so upload can restore them
        input_files = [f.path for f in itervalues(config.inputs_outputs)
                       if f.read_runs]
        if input_files:
            logging.info("Packing up original input files...")
            inputtar = tarfile.open(str(target / 'inputs.tar.gz'), 'w:gz')
            for ifile in input_files:
                filename = join_root(root, ifile)
                if filename.exists():
                    inputtar.add(str(filename), str(ifile))
            inputtar.close()

        # Meta-data for reprounzip
        metadata_write(target, metadata_initial_iofiles(config), 'chroot')

        signals.post_setup(target=target, pack=pack)
    except Exception:
        rmtree_fixed(root)
        raise
Beispiel #24
0
def vagrant_setup_create(args):
    """Sets up the experiment to be run in a Vagrant-built virtual machine.

    This can either build a chroot or not.

    If building a chroot, we do just like without Vagrant: we copy all the
    files and only get what's missing from the host. But we do install
    automatically the packages whose files are required.

    If not building a chroot, we install all the packages, and only unpack
    files that don't come from packages.

    In short: files from packages with packfiles=True will only be used if
    building a chroot.
    """
    if not args.pack:
        logging.critical("setup/create needs the pack filename")
        sys.exit(1)

    pack = Path(args.pack[0])
    target = Path(args.target[0])
    if target.exists():
        logging.critical("Target directory exists")
        sys.exit(1)
    use_chroot = args.use_chroot
    mount_bind = args.bind_magic_dirs
    record_usage(use_chroot=use_chroot,
                 mount_bind=mount_bind)

    signals.pre_setup(target=target, pack=pack)

    # Unpacks configuration file
    rpz_pack = RPZPack(pack)
    rpz_pack.extract_config(target / 'config.yml')

    # Loads config
    runs, packages, other_files = config = load_config(target / 'config.yml',
                                                       True)

    if not args.memory:
        memory = None
    else:
        try:
            memory = int(args.memory[-1])
        except ValueError:
            logging.critical("Invalid value for memory size: %r", args.memory)
            sys.exit(1)

    if args.base_image and args.base_image[0]:
        record_usage(vagrant_explicit_image=True)
        box = args.base_image[0]
        if args.distribution:
            target_distribution = args.distribution[0]
        else:
            target_distribution = None
    else:
        target_distribution, box = select_box(runs, gui=args.gui)
    logging.info("Using box %s", box)
    logging.debug("Distribution: %s", target_distribution or "unknown")

    # If using chroot, we might still need to install packages to get missing
    # (not packed) files
    if use_chroot:
        packages = [pkg for pkg in packages if not pkg.packfiles]
        if packages:
            record_usage(vagrant_install_pkgs=True)
            logging.info("Some packages were not packed, so we'll install and "
                         "copy their files\n"
                         "Packages that are missing:\n%s",
                         ' '.join(pkg.name for pkg in packages))

    if packages:
        try:
            installer = select_installer(pack, runs, target_distribution)
        except CantFindInstaller as e:
            logging.error("Need to install %d packages but couldn't select a "
                          "package installer: %s",
                          len(packages), e)

    target.mkdir(parents=True)

    try:
        # Writes setup script
        logging.info("Writing setup script %s...", target / 'setup.sh')
        with (target / 'setup.sh').open('w', encoding='utf-8',
                                        newline='\n') as fp:
            fp.write('#!/bin/sh\n\nset -e\n\n')
            if packages:
                # Updates package sources
                update_script = installer.update_script()
                if update_script:
                    fp.write(update_script)
                fp.write('\n')
                # Installs necessary packages
                fp.write(installer.install_script(packages))
                fp.write('\n')
                # TODO : Compare package versions (painful because of sh)

            # Untar
            if use_chroot:
                fp.write('\n'
                         'mkdir /experimentroot; cd /experimentroot\n')
                fp.write('tar zpxf /vagrant/data.tgz --numeric-owner '
                         '--strip=1 %s\n' % rpz_pack.data_prefix)
                if mount_bind:
                    fp.write('\n'
                             'mkdir -p /experimentroot/dev\n'
                             'mkdir -p /experimentroot/proc\n')

                for pkg in packages:
                    fp.write('\n# Copies files from package %s\n' % pkg.name)
                    for f in pkg.files:
                        f = f.path
                        dest = join_root(PosixPath('/experimentroot'), f)
                        fp.write('mkdir -p %s\n' %
                                 shell_escape(unicode_(f.parent)))
                        fp.write('cp -L %s %s\n' % (
                                 shell_escape(unicode_(f)),
                                 shell_escape(unicode_(dest))))
                fp.write(
                    '\n'
                    'cp /etc/resolv.conf /experimentroot/etc/resolv.conf\n')
            else:
                fp.write('\ncd /\n')
                paths = set()
                pathlist = []
                # Adds intermediate directories, and checks for existence in
                # the tar
                logging.info("Generating file list...")
                data_files = rpz_pack.data_filenames()
                for f in other_files:
                    if f.path.name == 'resolv.conf' and (
                            f.path.lies_under('/etc') or
                            f.path.lies_under('/run') or
                            f.path.lies_under('/var')):
                        continue
                    path = PosixPath('/')
                    for c in rpz_pack.remove_data_prefix(f.path).components:
                        path = path / c
                        if path in paths:
                            continue
                        paths.add(path)
                        if path in data_files:
                            pathlist.append(path)
                        else:
                            logging.info("Missing file %s", path)
                # FIXME : for some reason we need reversed() here, I'm not sure
                # why. Need to read more of tar's docs.
                # TAR bug: --no-overwrite-dir removes --keep-old-files
                # TAR bug: there is no way to make --keep-old-files not report
                # an error if an existing file is encountered. --skip-old-files
                # was introduced too recently. Instead, we just ignore the exit
                # status
                with (target / 'rpz-files.list').open('wb') as lfp:
                    for p in reversed(pathlist):
                        lfp.write(join_root(rpz_pack.data_prefix, p).path)
                        lfp.write(b'\0')
                fp.write('tar zpxf /vagrant/data.tgz --keep-old-files '
                         '--numeric-owner --strip=1 '
                         '--null -T /vagrant/rpz-files.list || /bin/true\n')

            # Copies busybox
            if use_chroot:
                arch = runs[0]['architecture']
                download_file(busybox_url(arch),
                              target / 'busybox',
                              'busybox-%s' % arch)
                fp.write(r'''
cp /vagrant/busybox /experimentroot/busybox
chmod +x /experimentroot/busybox
mkdir -p /experimentroot/bin
[ -e /experimentroot/bin/sh ] || \
    ln -s /busybox /experimentroot/bin/sh
''')

        # Copies pack
        logging.info("Copying pack file...")
        rpz_pack.copy_data_tar(target / 'data.tgz')

        rpz_pack.close()

        # Writes Vagrant file
        logging.info("Writing %s...", target / 'Vagrantfile')
        with (target / 'Vagrantfile').open('w', encoding='utf-8',
                                           newline='\n') as fp:
            # Vagrant header and version
            fp.write(
                '# -*- mode: ruby -*-\n'
                '# vi: set ft=ruby\n\n'
                'VAGRANTFILE_API_VERSION = "2"\n\n'
                'Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|\n')
            # Selects which box to install
            fp.write('  config.vm.box = "%s"\n' % box)
            # Run the setup script on the virtual machine
            fp.write('  config.vm.provision "shell", path: "setup.sh"\n')

            # Memory size
            if memory is not None or args.gui:
                fp.write('  config.vm.provider "virtualbox" do |v|\n')
                if memory is not None:
                    fp.write('    v.memory = %d\n' % memory)
                if args.gui:
                    fp.write('    v.gui = true\n')
                fp.write('  end\n')

            fp.write('end\n')

        # Meta-data for reprounzip
        write_dict(target,
                   metadata_initial_iofiles(config,
                                            {'use_chroot': use_chroot,
                                             'gui': args.gui}))

        signals.post_setup(target=target, pack=pack)
    except Exception:
        target.rmtree(ignore_errors=True)
        raise
Beispiel #25
0
def directory_create(args):
    """Unpacks the experiment in a folder.

    Only the files that are not part of a package are copied (unless they are
    missing from the system and were packed).

    In addition, input files are put in a tar.gz (so they can be put back after
    an upload) and the configuration file is extracted.
    """
    if not args.pack:
        logging.critical("setup needs the pack filename")
        sys.exit(1)

    pack = Path(args.pack[0])
    target = Path(args.target[0])
    if target.exists():
        logging.critical("Target directory exists")
        sys.exit(1)

    if not issubclass(DefaultAbstractPath, PosixPath):
        logging.critical("Not unpacking on POSIX system")
        sys.exit(1)

    signals.pre_setup(target=target, pack=pack)

    # Unpacks configuration file
    tar = tarfile.open(str(pack), 'r:*')
    member = tar.getmember('METADATA/config.yml')
    member.name = 'config.yml'
    tar.extract(member, str(target))

    # Loads config
    runs, packages, other_files = load_config_file(target / 'config.yml', True)

    target.mkdir()
    root = (target / 'root').absolute()
    root.mkdir()

    # Checks packages
    missing_files = False
    for pkg in packages:
        if pkg.packfiles:
            continue
        for f in pkg.files:
            f = Path(f.path)
            if not f.exists():
                logging.error(
                    "Missing file %s (from package %s that wasn't packed) "
                    "on host, experiment will probably miss it.", f, pkg.name)
                missing_files = True
    if missing_files:
        record_usage(directory_missing_pkgs=True)
        logging.error("Some packages are missing, you should probably install "
                      "them.\nUse 'reprounzip installpkgs -h' for help")

    # Unpacks files
    if any('..' in m.name or m.name.startswith('/') for m in tar.getmembers()):
        logging.critical("Tar archive contains invalid pathnames")
        sys.exit(1)
    members = [m for m in tar.getmembers() if m.name.startswith('DATA/')]
    for m in members:
        m.name = m.name[5:]
    # Makes symlink targets relative
    for m in members:
        if not m.issym():
            continue
        linkname = PosixPath(m.linkname)
        if linkname.is_absolute:
            m.linkname = join_root(root, PosixPath(m.linkname)).path
    logging.info("Extracting files...")
    tar.extractall(str(root), members)
    tar.close()

    # Gets library paths
    lib_dirs = []
    p = subprocess.Popen(['/sbin/ldconfig', '-v', '-N'],
                         stdout=subprocess.PIPE)
    try:
        for l in p.stdout:
            if len(l) < 3 or l[0] in (b' ', b'\t'):
                continue
            if l.endswith(b':\n'):
                lib_dirs.append(Path(l[:-2]))
    finally:
        p.wait()

    # Original input files, so upload can restore them
    if any(run['input_files'] for run in runs):
        logging.info("Packing up original input files...")
        inputtar = tarfile.open(str(target / 'inputs.tar.gz'), 'w:gz')
        for run in runs:
            for ifile in itervalues(run['input_files']):
                inputtar.add(str(join_root(root, PosixPath(ifile))),
                             str(PosixPath(ifile)))
        inputtar.close()

    # Meta-data for reprounzip
    write_dict(target / '.reprounzip', {}, 'directory')

    signals.post_setup(target=target)
Beispiel #26
0
def vagrant_setup_create(args):
    """Sets up the experiment to be run in a Vagrant-built virtual machine.

    This can either build a chroot or not.

    If building a chroot, we do just like without Vagrant: we copy all the
    files and only get what's missing from the host. But we do install
    automatically the packages whose files are required.

    If not building a chroot, we install all the packages, and only unpack
    files that don't come from packages.

    In short: files from packages with packfiles=True will only be used if
    building a chroot.
    """
    if not args.pack:
        logging.critical("setup/create needs the pack filename")
        sys.exit(1)

    pack = Path(args.pack[0])
    target = Path(args.target[0])
    if target.exists():
        logging.critical("Target directory exists")
        sys.exit(1)
    use_chroot = args.use_chroot
    mount_bind = args.bind_magic_dirs
    record_usage(use_chroot=use_chroot, mount_bind=mount_bind)

    signals.pre_setup(target=target, pack=pack)

    # Unpacks configuration file
    tar = tarfile.open(str(pack), 'r:*')
    member = tar.getmember('METADATA/config.yml')
    member.name = 'config.yml'
    tar.extract(member, str(target))
    tar.close()

    # Loads config
    runs, packages, other_files = load_config(target / 'config.yml', True)

    if args.base_image and args.base_image[0]:
        record_usage(vagrant_explicit_image=True)
        box = args.base_image[0]
        if args.distribution:
            target_distribution = args.distribution[0]
        else:
            target_distribution = None
    else:
        target_distribution, box = select_box(runs)
    logging.info("Using box %s", box)
    logging.debug("Distribution: %s", target_distribution or "unknown")

    # If using chroot, we might still need to install packages to get missing
    # (not packed) files
    if use_chroot:
        packages = [pkg for pkg in packages if not pkg.packfiles]
        if packages:
            record_usage(vagrant_install_pkgs=True)
            logging.info(
                "Some packages were not packed, so we'll install and "
                "copy their files\n"
                "Packages that are missing:\n%s",
                ' '.join(pkg.name for pkg in packages))

    if packages:
        try:
            installer = select_installer(pack, runs, target_distribution)
        except CantFindInstaller as e:
            logging.error(
                "Need to install %d packages but couldn't select a "
                "package installer: %s", len(packages), e)

    target.mkdir(parents=True)

    # Writes setup script
    logging.info("Writing setup script %s...", target / 'setup.sh')
    with (target / 'setup.sh').open('w', encoding='utf-8', newline='\n') as fp:
        fp.write('#!/bin/sh\n\nset -e\n\n')
        if packages:
            # Updates package sources
            fp.write(installer.update_script())
            fp.write('\n')
            # Installs necessary packages
            fp.write(installer.install_script(packages))
            fp.write('\n')
            # TODO : Compare package versions (painful because of sh)

        # Untar
        if use_chroot:
            fp.write('\n' 'mkdir /experimentroot; cd /experimentroot\n')
            fp.write('tar zpxf /vagrant/experiment.rpz '
                     '--numeric-owner --strip=1 DATA\n')
            if mount_bind:
                fp.write('\n'
                         'mkdir -p /experimentroot/dev\n'
                         'mount -o rbind /dev /experimentroot/dev\n'
                         'mkdir -p /experimentroot/proc\n'
                         'mount -o rbind /proc /experimentroot/proc\n')

            for pkg in packages:
                fp.write('\n# Copies files from package %s\n' % pkg.name)
                for f in pkg.files:
                    f = f.path
                    dest = join_root(PosixPath('/experimentroot'), f)
                    fp.write('mkdir -p %s\n' %
                             shell_escape(unicode_(f.parent)))
                    fp.write('cp -L %s %s\n' % (shell_escape(
                        unicode_(f)), shell_escape(unicode_(dest))))
        else:
            fp.write('\ncd /\n')
            paths = set()
            pathlist = []
            dataroot = PosixPath('DATA')
            # Adds intermediate directories, and checks for existence in the
            # tar
            tar = tarfile.open(str(pack), 'r:*')
            for f in other_files:
                path = PosixPath('/')
                for c in f.path.components[1:]:
                    path = path / c
                    if path in paths:
                        continue
                    paths.add(path)
                    datapath = join_root(dataroot, path)
                    try:
                        tar.getmember(str(datapath))
                    except KeyError:
                        logging.info("Missing file %s", datapath)
                    else:
                        pathlist.append(unicode_(datapath))
            tar.close()
            # FIXME : for some reason we need reversed() here, I'm not sure
            # why. Need to read more of tar's docs.
            # TAR bug: --no-overwrite-dir removes --keep-old-files
            # TAR bug: there is no way to make --keep-old-files not report an
            # error if an existing file is encountered. --skip-old-files was
            # introduced too recently. Instead, we just ignore the exit status
            fp.write('tar zpxf /vagrant/experiment.rpz --keep-old-files '
                     '--numeric-owner --strip=1 %s || /bin/true\n' %
                     ' '.join(shell_escape(p) for p in reversed(pathlist)))

        # Copies /bin/sh + dependencies
        if use_chroot:
            url = busybox_url(runs[0]['architecture'])
            fp.write(r'''
mkdir -p /experimentroot/bin
mkdir -p /experimentroot/usr/bin
if [ ! -e /experimentroot/bin/sh -o ! -e /experimentroot/usr/bin/env ]; then
    wget --quiet -O /experimentroot/bin/busybox {url}
    chmod +x /experimentroot/bin/busybox
fi
[ -e /experimentroot/bin/sh ] || \
    ln -s /bin/busybox /experimentroot/bin/sh
[ -e /experimentroot/usr/bin/env ] || \
    ln -s /bin/busybox /experimentroot/usr/bin/env
'''.format(url=url))

    # Copies pack
    logging.info("Copying pack file...")
    pack.copyfile(target / 'experiment.rpz')

    # Writes Vagrant file
    logging.info("Writing %s...", target / 'Vagrantfile')
    with (target / 'Vagrantfile').open('w', encoding='utf-8',
                                       newline='\n') as fp:
        # Vagrant header and version
        fp.write('# -*- mode: ruby -*-\n'
                 '# vi: set ft=ruby\n\n'
                 'VAGRANTFILE_API_VERSION = "2"\n\n'
                 'Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|\n')
        # Selects which box to install
        fp.write('  config.vm.box = "%s"\n' % box)
        # Run the setup script on the virtual machine
        fp.write('  config.vm.provision "shell", path: "setup.sh"\n')

        fp.write('end\n')

    # Meta-data for reprounzip
    write_dict(target / '.reprounzip', {'use_chroot': use_chroot})

    signals.post_setup(target=target)
Beispiel #27
0
def main():
    """Entry point when called on the command-line.
    """
    # Locale
    locale.setlocale(locale.LC_ALL, '')

    # Parses command-line

    # General options
    def add_options(opts):
        opts.add_argument('--version',
                          action='version',
                          version="reprounzip version %s" % __version__)

    # Loads plugins
    for name, func, descr, descr_1 in get_plugins('reprounzip.plugins'):
        func()

    parser = RPUZArgumentParser(
        description="reprounzip is the ReproZip component responsible for "
        "unpacking and reproducing an experiment previously "
        "packed with reprozip",
        epilog="Please report issues to [email protected]")
    add_options(parser)
    parser.add_argument('-v',
                        '--verbose',
                        action='count',
                        default=1,
                        dest='verbosity',
                        help="augments verbosity level")
    subparsers = parser.add_subparsers(title="subcommands", metavar='')

    # usage_report subcommand
    parser_stats = subparsers.add_parser(
        'usage_report', help="Enables or disables anonymous usage reports")
    add_options(parser_stats)
    parser_stats.add_argument('--enable', action='store_true')
    parser_stats.add_argument('--disable', action='store_true')
    parser_stats.set_defaults(func=usage_report)

    # Loads unpackers
    for name, func, descr, descr_1 in get_plugins('reprounzip.unpackers'):
        plugin_parser = subparsers.add_parser(
            name,
            help=descr_1,
            description=descr,
            formatter_class=argparse.RawDescriptionHelpFormatter)
        add_options(plugin_parser)
        info = func(plugin_parser)
        plugin_parser.set_defaults(selected_unpacker=name)
        if info is None:
            info = {}
        unpackers[name] = info

    signals.pre_parse_args(parser=parser, subparsers=subparsers)
    args = parser.parse_args()
    signals.post_parse_args(args=args)
    if getattr(args, 'func', None) is None:
        parser.print_help(sys.stderr)
        sys.exit(2)
    signals.unpacker = getattr(args, 'selected_unpacker', None)
    setup_logging('REPROUNZIP', args.verbosity)

    setup_usage_report('reprounzip', __version__)
    if hasattr(args, 'selected_unpacker'):
        record_usage(unpacker=args.selected_unpacker)
    signals.pre_setup.subscribe(lambda **kw: record_usage(setup=True))
    signals.pre_run.subscribe(lambda **kw: record_usage(run=True))

    try:
        try:
            args.func(args)
        except UsageError:
            raise
        except Exception as e:
            signals.application_finishing(reason=e)
            submit_usage_report(result=type(e).__name__)
            raise
        else:
            signals.application_finishing(reason=None)
    except UsageError:
        parser.print_help(sys.stderr)
        sys.exit(2)
    submit_usage_report(result='success')
    sys.exit(0)
Beispiel #28
0
def docker_setup_create(args):
    """Sets up the experiment to be run in a Docker-built container.
    """
    pack = Path(args.pack[0])
    target = Path(args.target[0])
    if target.exists():
        logging.critical("Target directory exists")
        sys.exit(1)

    signals.pre_setup(target=target, pack=pack)

    target.mkdir()

    try:
        # Unpacks configuration file
        rpz_pack = RPZPack(pack)
        rpz_pack.extract_config(target / 'config.yml')

        # Loads config
        runs, packages, other_files = config = load_config(
            target / 'config.yml', True)

        if args.base_image:
            record_usage(docker_explicit_base=True)
            base_image = args.base_image[0]
            if args.distribution:
                target_distribution = args.distribution[0]
            else:
                target_distribution = None
        else:
            target_distribution, base_image = select_image(runs)
        logging.info("Using base image %s", base_image)
        logging.debug("Distribution: %s", target_distribution or "unknown")

        rpz_pack.copy_data_tar(target / 'data.tgz')

        arch = runs[0]['architecture']

        # Writes Dockerfile
        logging.info("Writing %s...", target / 'Dockerfile')
        with (target / 'Dockerfile').open('w', encoding='utf-8',
                                          newline='\n') as fp:
            fp.write('FROM %s\n\n' % base_image)

            # Installs busybox
            download_file(busybox_url(arch), target / 'busybox',
                          'busybox-%s' % arch)
            fp.write('COPY busybox /busybox\n')

            # Installs rpzsudo
            download_file(sudo_url(arch), target / 'rpzsudo',
                          'rpzsudo-%s' % arch)
            fp.write('COPY rpzsudo /rpzsudo\n\n')

            fp.write('COPY data.tgz /reprozip_data.tgz\n\n')
            fp.write('COPY rpz-files.list /rpz-files.list\n')
            fp.write('RUN \\\n' '    chmod +x /busybox /rpzsudo && \\\n')

            if args.install_pkgs:
                # Install every package through package manager
                missing_packages = []
            else:
                # Only install packages that were not packed
                missing_packages = [pkg for pkg in packages if pkg.packfiles]
                packages = [pkg for pkg in packages if not pkg.packfiles]
            if packages:
                record_usage(docker_install_pkgs=True)
                try:
                    installer = select_installer(pack, runs,
                                                 target_distribution)
                except CantFindInstaller as e:
                    logging.error(
                        "Need to install %d packages but couldn't "
                        "select a package installer: %s", len(packages), e)
                    sys.exit(1)
                # Updates package sources
                update_script = installer.update_script()
                if update_script:
                    fp.write('    %s && \\\n' % update_script)
                # Installs necessary packages
                fp.write('    %s && \\\n' % installer.install_script(packages))
                logging.info(
                    "Dockerfile will install the %d software "
                    "packages that were not packed", len(packages))
            else:
                record_usage(docker_install_pkgs=False)

            # Untar
            paths = set()
            pathlist = []
            # Add intermediate directories, and check for existence in the tar
            missing_files = chain.from_iterable(pkg.files
                                                for pkg in missing_packages)
            data_files = rpz_pack.data_filenames()
            listoffiles = list(chain(other_files, missing_files))
            for f in listoffiles:
                path = PosixPath('/')
                for c in rpz_pack.remove_data_prefix(f.path).components:
                    path = path / c
                    if path in paths:
                        continue
                    paths.add(path)
                    if path in data_files:
                        pathlist.append(path)
                    else:
                        logging.info("Missing file %s", path)
            rpz_pack.close()
            # FIXME : for some reason we need reversed() here, I'm not sure why
            # Need to read more of tar's docs.
            # TAR bug: --no-overwrite-dir removes --keep-old-files
            with (target / 'rpz-files.list').open('wb') as lfp:
                for p in reversed(pathlist):
                    lfp.write(join_root(rpz_pack.data_prefix, p).path)
                    lfp.write(b'\0')
            fp.write('    cd / && '
                     '(tar zpxf /reprozip_data.tgz -U --recursive-unlink '
                     '--numeric-owner --strip=1 --null -T /rpz-files.list || '
                     '/busybox echo "TAR reports errors, this might or might '
                     'not prevent the execution to run")\n')

        # Meta-data for reprounzip
        write_dict(target, metadata_initial_iofiles(config))

        signals.post_setup(target=target, pack=pack)
    except Exception:
        target.rmtree(ignore_errors=True)
        raise
Beispiel #29
0
def docker_setup_create(args):
    """Sets up the experiment to be run in a Docker-built container.
    """
    pack = Path(args.pack[0])
    target = Path(args.target[0])
    if target.exists():
        logging.critical("Target directory exists")
        sys.exit(1)

    signals.pre_setup(target=target, pack=pack)

    # Unpacks configuration file
    tar = tarfile.open(str(pack), 'r:*')
    member = tar.getmember('METADATA/config.yml')
    member.name = 'config.yml'
    tar.extract(member, str(target))
    tar.close()

    # Loads config
    runs, packages, other_files = load_config(target / 'config.yml', True)

    if args.base_image:
        record_usage(docker_explicit_base=True)
        base_image = args.base_image[0]
        if args.distribution:
            target_distribution = args.distribution[0]
        else:
            target_distribution = None
    else:
        target_distribution, base_image = select_image(runs)
    logging.info("Using base image %s", base_image)
    logging.debug("Distribution: %s", target_distribution or "unknown")

    target.mkdir(parents=True)
    pack.copyfile(target / 'experiment.rpz')

    # Writes Dockerfile
    logging.info("Writing %s...", target / 'Dockerfile')
    with (target / 'Dockerfile').open('w',
                                      encoding='utf-8', newline='\n') as fp:
        fp.write('FROM %s\n\n' % base_image)

        # Installs busybox
        download_file(busybox_url(runs[0]['architecture']),
                      target / 'busybox')
        fp.write('COPY busybox /bin/busybox\n')

        fp.write('COPY experiment.rpz /reprozip_experiment.rpz\n\n')
        fp.write('RUN \\\n'
                 '    chmod +x /bin/busybox && \\\n')

        if args.install_pkgs:
            # Install every package through package manager
            missing_packages = []
        else:
            # Only install packages that were not packed
            missing_packages = [pkg for pkg in packages if pkg.packfiles]
            packages = [pkg for pkg in packages if not pkg.packfiles]
        # FIXME : Right now, we need 'sudo' to be available (and it's not
        # necessarily in the base image)
        if packages:
            record_usage(docker_install_pkgs=True)
        else:
            record_usage(docker_install_pkgs="sudo")
        packages += [Package('sudo', None, packfiles=False)]
        if packages:
            try:
                installer = select_installer(pack, runs, target_distribution)
            except CantFindInstaller as e:
                logging.error("Need to install %d packages but couldn't "
                              "select a package installer: %s",
                              len(packages), e)
                sys.exit(1)
            # Updates package sources
            fp.write('    %s && \\\n' % installer.update_script())
            # Installs necessary packages
            fp.write('    %s && \\\n' % installer.install_script(packages))
        logging.info("Dockerfile will install the %d software packages that "
                     "were not packed", len(packages))

        # Untar
        paths = set()
        pathlist = []
        dataroot = PosixPath('DATA')
        # Adds intermediate directories, and checks for existence in the tar
        tar = tarfile.open(str(pack), 'r:*')
        missing_files = chain.from_iterable(pkg.files
                                            for pkg in missing_packages)
        for f in chain(other_files, missing_files):
            path = PosixPath('/')
            for c in f.path.components[1:]:
                path = path / c
                if path in paths:
                    continue
                paths.add(path)
                datapath = join_root(dataroot, path)
                try:
                    tar.getmember(str(datapath))
                except KeyError:
                    logging.info("Missing file %s", datapath)
                else:
                    pathlist.append(unicode_(datapath))
        tar.close()
        # FIXME : for some reason we need reversed() here, I'm not sure why.
        # Need to read more of tar's docs.
        # TAR bug: --no-overwrite-dir removes --keep-old-files
        fp.write('    cd / && tar zpxf /reprozip_experiment.rpz '
                 '--numeric-owner --strip=1 %s\n' %
                 ' '.join(shell_escape(p) for p in reversed(pathlist)))

    # Meta-data for reprounzip
    write_dict(target / '.reprounzip', {})

    signals.post_setup(target=target)
Beispiel #30
0
def directory_create(args):
    """Unpacks the experiment in a folder.

    Only the files that are not part of a package are copied (unless they are
    missing from the system and were packed).

    In addition, input files are put in a tar.gz (so they can be put back after
    an upload) and the configuration file is extracted.
    """
    if not args.pack:
        logging.critical("setup needs the pack filename")
        sys.exit(1)

    pack = Path(args.pack[0])
    target = Path(args.target[0])
    if target.exists():
        logging.critical("Target directory exists")
        sys.exit(1)

    if not issubclass(DefaultAbstractPath, PosixPath):
        logging.critical("Not unpacking on POSIX system")
        sys.exit(1)

    signals.pre_setup(target=target, pack=pack)

    # Unpacks configuration file
    tar = tarfile.open(str(pack), 'r:*')
    member = tar.getmember('METADATA/config.yml')
    member.name = 'config.yml'
    tar.extract(member, str(target))

    # Loads config
    runs, packages, other_files = load_config_file(target / 'config.yml', True)

    target.mkdir()
    root = (target / 'root').absolute()
    root.mkdir()

    # Checks packages
    missing_files = False
    for pkg in packages:
        if pkg.packfiles:
            continue
        for f in pkg.files:
            f = Path(f.path)
            if not f.exists():
                logging.error(
                        "Missing file %s (from package %s that wasn't packed) "
                        "on host, experiment will probably miss it.",
                        f, pkg.name)
                missing_files = True
    if missing_files:
        record_usage(directory_missing_pkgs=True)
        logging.error(
                "Some packages are missing, you should probably install "
                "them.\nUse 'reprounzip installpkgs -h' for help")

    # Unpacks files
    if any('..' in m.name or m.name.startswith('/') for m in tar.getmembers()):
        logging.critical("Tar archive contains invalid pathnames")
        sys.exit(1)
    members = [m for m in tar.getmembers() if m.name.startswith('DATA/')]
    for m in members:
        m.name = m.name[5:]
    # Makes symlink targets relative
    for m in members:
        if not m.issym():
            continue
        linkname = PosixPath(m.linkname)
        if linkname.is_absolute:
            m.linkname = join_root(root, PosixPath(m.linkname)).path
    logging.info("Extracting files...")
    tar.extractall(str(root), members)
    tar.close()

    # Gets library paths
    lib_dirs = []
    p = subprocess.Popen(['/sbin/ldconfig', '-v', '-N'],
                         stdout=subprocess.PIPE)
    try:
        for l in p.stdout:
            if len(l) < 3 or l[0] in (b' ', b'\t'):
                continue
            if l.endswith(b':\n'):
                lib_dirs.append(Path(l[:-2]))
    finally:
        p.wait()

    # Original input files, so upload can restore them
    if any(run['input_files'] for run in runs):
        logging.info("Packing up original input files...")
        inputtar = tarfile.open(str(target / 'inputs.tar.gz'), 'w:gz')
        for run in runs:
            for ifile in itervalues(run['input_files']):
                inputtar.add(str(join_root(root, PosixPath(ifile))),
                             str(PosixPath(ifile)))
        inputtar.close()

    # Meta-data for reprounzip
    write_dict(target / '.reprounzip', {}, 'directory')

    signals.post_setup(target=target)
Beispiel #31
0
def vagrant_setup_create(args):
    """Sets up the experiment to be run in a Vagrant-built virtual machine.

    This can either build a chroot or not.

    If building a chroot, we do just like without Vagrant: we copy all the
    files and only get what's missing from the host. But we do install
    automatically the packages whose files are required.

    If not building a chroot, we install all the packages, and only unpack
    files that don't come from packages.

    In short: files from packages with packfiles=True will only be used if
    building a chroot.
    """
    if not args.pack:
        logging.critical("setup/create needs the pack filename")
        sys.exit(1)

    pack = Path(args.pack[0])
    target = Path(args.target[0])
    if target.exists():
        logging.critical("Target directory exists")
        sys.exit(1)
    use_chroot = args.use_chroot
    mount_bind = args.bind_magic_dirs
    record_usage(use_chroot=use_chroot,
                 mount_bind=mount_bind)

    signals.pre_setup(target=target, pack=pack)

    # Unpacks configuration file
    tar = tarfile.open(str(pack), 'r:*')
    member = tar.getmember('METADATA/config.yml')
    member.name = 'config.yml'
    tar.extract(member, str(target))
    tar.close()

    # Loads config
    runs, packages, other_files = load_config(target / 'config.yml', True)

    if args.base_image and args.base_image[0]:
        record_usage(vagrant_explicit_image=True)
        box = args.base_image[0]
        if args.distribution:
            target_distribution = args.distribution[0]
        else:
            target_distribution = None
    else:
        target_distribution, box = select_box(runs)
    logging.info("Using box %s", box)
    logging.debug("Distribution: %s", target_distribution or "unknown")

    # If using chroot, we might still need to install packages to get missing
    # (not packed) files
    if use_chroot:
        packages = [pkg for pkg in packages if not pkg.packfiles]
        if packages:
            record_usage(vagrant_install_pkgs=True)
            logging.info("Some packages were not packed, so we'll install and "
                         "copy their files\n"
                         "Packages that are missing:\n%s",
                         ' '.join(pkg.name for pkg in packages))

    if packages:
        try:
            installer = select_installer(pack, runs, target_distribution)
        except CantFindInstaller as e:
            logging.error("Need to install %d packages but couldn't select a "
                          "package installer: %s",
                          len(packages), e)

    target.mkdir(parents=True)

    # Writes setup script
    logging.info("Writing setup script %s...", target / 'setup.sh')
    with (target / 'setup.sh').open('w', encoding='utf-8', newline='\n') as fp:
        fp.write('#!/bin/sh\n\nset -e\n\n')
        if packages:
            # Updates package sources
            fp.write(installer.update_script())
            fp.write('\n')
            # Installs necessary packages
            fp.write(installer.install_script(packages))
            fp.write('\n')
            # TODO : Compare package versions (painful because of sh)

        # Untar
        if use_chroot:
            fp.write('\n'
                     'mkdir /experimentroot; cd /experimentroot\n')
            fp.write('tar zpxf /vagrant/experiment.rpz '
                     '--numeric-owner --strip=1 DATA\n')
            if mount_bind:
                fp.write('\n'
                         'mkdir -p /experimentroot/dev\n'
                         'mount -o rbind /dev /experimentroot/dev\n'
                         'mkdir -p /experimentroot/proc\n'
                         'mount -o rbind /proc /experimentroot/proc\n')

            for pkg in packages:
                fp.write('\n# Copies files from package %s\n' % pkg.name)
                for f in pkg.files:
                    f = f.path
                    dest = join_root(PosixPath('/experimentroot'), f)
                    fp.write('mkdir -p %s\n' %
                             shell_escape(unicode_(f.parent)))
                    fp.write('cp -L %s %s\n' % (
                             shell_escape(unicode_(f)),
                             shell_escape(unicode_(dest))))
        else:
            fp.write('\ncd /\n')
            paths = set()
            pathlist = []
            dataroot = PosixPath('DATA')
            # Adds intermediate directories, and checks for existence in the
            # tar
            tar = tarfile.open(str(pack), 'r:*')
            for f in other_files:
                path = PosixPath('/')
                for c in f.path.components[1:]:
                    path = path / c
                    if path in paths:
                        continue
                    paths.add(path)
                    datapath = join_root(dataroot, path)
                    try:
                        tar.getmember(str(datapath))
                    except KeyError:
                        logging.info("Missing file %s", datapath)
                    else:
                        pathlist.append(unicode_(datapath))
            tar.close()
            # FIXME : for some reason we need reversed() here, I'm not sure
            # why. Need to read more of tar's docs.
            # TAR bug: --no-overwrite-dir removes --keep-old-files
            # TAR bug: there is no way to make --keep-old-files not report an
            # error if an existing file is encountered. --skip-old-files was
            # introduced too recently. Instead, we just ignore the exit status
            fp.write('tar zpxf /vagrant/experiment.rpz --keep-old-files '
                     '--numeric-owner --strip=1 %s || /bin/true\n' %
                     ' '.join(shell_escape(p) for p in reversed(pathlist)))

        # Copies /bin/sh + dependencies
        if use_chroot:
            url = busybox_url(runs[0]['architecture'])
            fp.write(r'''
mkdir -p /experimentroot/bin
mkdir -p /experimentroot/usr/bin
if [ ! -e /experimentroot/bin/sh -o ! -e /experimentroot/usr/bin/env ]; then
    wget --quiet -O /experimentroot/bin/busybox {url}
    chmod +x /experimentroot/bin/busybox
fi
[ -e /experimentroot/bin/sh ] || \
    ln -s /bin/busybox /experimentroot/bin/sh
[ -e /experimentroot/usr/bin/env ] || \
    ln -s /bin/busybox /experimentroot/usr/bin/env
'''.format(url=url))

    # Copies pack
    logging.info("Copying pack file...")
    pack.copyfile(target / 'experiment.rpz')

    # Writes Vagrant file
    logging.info("Writing %s...", target / 'Vagrantfile')
    with (target / 'Vagrantfile').open('w', encoding='utf-8',
                                       newline='\n') as fp:
        # Vagrant header and version
        fp.write('# -*- mode: ruby -*-\n'
                 '# vi: set ft=ruby\n\n'
                 'VAGRANTFILE_API_VERSION = "2"\n\n'
                 'Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|\n')
        # Selects which box to install
        fp.write('  config.vm.box = "%s"\n' % box)
        # Run the setup script on the virtual machine
        fp.write('  config.vm.provision "shell", path: "setup.sh"\n')

        fp.write('end\n')

    # Meta-data for reprounzip
    write_dict(target / '.reprounzip', {'use_chroot': use_chroot})

    signals.post_setup(target=target)
Beispiel #32
0
def run_interactive(ssh_info, interactive, cmd, request_pty, forwarded_ports):
    """Runs a command on an SSH server.

    If `interactive` is True, we'll try to find an ``ssh`` executable, falling
    back to paramiko if it's not found. The terminal handling code is a bit
    wonky, so using ``ssh`` is definitely a good idea, especially on Windows.
    Non-interactive commands should run fine.

    :param ssh_info: dict with `hostname`, `port`, `username`, `key_filename`,
    passed directly to paramiko
    :type ssh_info: dict
    :param interactive: whether to connect local input to the remote process
    :type interactive: bool
    :param cmd: command-line to run on the server
    :type cmd: basestring
    :param request_pty: whether to request a PTY from the SSH server
    :type request_pty: bool
    :param forwarded_ports: ports to forward back to us; iterable of pairs
    ``(port_number, connector)`` where `port_number` is the remote port number
    and `connector` is the connector object used to build the connected socket
    to forward to on this side
    """
    if interactive:
        ssh_exe = find_ssh_executable()
    else:
        ssh_exe = None

    if interactive and ssh_exe:
        record_usage(vagrant_ssh='ssh')
        args = [
            ssh_exe,
            '-t' if request_pty else '-T',  # Force allocation of PTY
            '-o',
            'StrictHostKeyChecking=no',  # Silently accept host keys
            '-o',
            'UserKnownHostsFile=/dev/null',  # Don't store host keys
            '-i',
            ssh_info['key_filename'],
            '-p',
            '%d' % ssh_info['port']
        ]
        for remote_port, connector in forwarded_ports:
            # Remote port will connect to a local port
            fwd = LocalForwarder(connector)
            args.append('-R%d:127.0.0.1:%d' % (remote_port, fwd.local_port))
        args.append('%s@%s' % (ssh_info['username'], ssh_info['hostname']))
        args.append(cmd)
        return interruptible_call(args)

    else:
        record_usage(vagrant_ssh='interactive' if interactive else 'simple')
        # Connects to the machine
        ssh = paramiko.SSHClient()
        ssh.set_missing_host_key_policy(IgnoreMissingKey())
        ssh.connect(**ssh_info)

        # Starts forwarding
        forwarders = []
        for remote_port, connector in forwarded_ports:
            forwarders.append(
                SSHForwarder(ssh.get_transport(), remote_port, connector))

        chan = ssh.get_transport().open_session()
        if request_pty:
            chan.get_pty()

        # Execute command
        logging.info("Connected via SSH, running command...")
        chan.exec_command(cmd)

        # Get output
        if interactive:
            interactive_shell(chan)
        else:
            chan.shutdown_write()
            while True:
                data = chan.recv(1024)
                if len(data) == 0:
                    break
                sys.stdout.buffer.write(data)
                sys.stdout.flush()
        retcode = chan.recv_exit_status()
        ssh.close()
        return retcode
Beispiel #33
0
def docker_setup_create(args):
    """Sets up the experiment to be run in a Docker-built container.
    """
    pack = Path(args.pack[0])
    target = Path(args.target[0])
    if target.exists():
        logging.critical("Target directory exists")
        sys.exit(1)

    signals.pre_setup(target=target, pack=pack)

    target.mkdir()

    try:
        # Unpacks configuration file
        rpz_pack = RPZPack(pack)
        rpz_pack.extract_config(target / 'config.yml')

        # Loads config
        runs, packages, other_files = config = load_config(
            target / 'config.yml', True)

        if args.base_image:
            record_usage(docker_explicit_base=True)
            base_image = args.base_image[0]
            if args.distribution:
                target_distribution = args.distribution[0]
            else:
                target_distribution = None
        else:
            target_distribution, base_image = select_image(runs)
        logging.info("Using base image %s", base_image)
        logging.debug("Distribution: %s", target_distribution or "unknown")

        rpz_pack.copy_data_tar(target / 'data.tgz')

        arch = runs[0]['architecture']

        # Writes Dockerfile
        logging.info("Writing %s...", target / 'Dockerfile')
        with (target / 'Dockerfile').open('w', encoding='utf-8',
                                          newline='\n') as fp:
            fp.write('FROM %s\n\n' % base_image)

            # Installs busybox
            download_file(busybox_url(arch),
                          target / 'busybox',
                          'busybox-%s' % arch)
            fp.write('COPY busybox /busybox\n')

            # Installs rpzsudo
            download_file(sudo_url(arch),
                          target / 'rpzsudo',
                          'rpzsudo-%s' % arch)
            fp.write('COPY rpzsudo /rpzsudo\n\n')

            fp.write('COPY data.tgz /reprozip_data.tgz\n\n')
            fp.write('COPY rpz-files.list /rpz-files.list\n')
            fp.write('RUN \\\n'
                     '    chmod +x /busybox /rpzsudo && \\\n')

            if args.install_pkgs:
                # Install every package through package manager
                missing_packages = []
            else:
                # Only install packages that were not packed
                missing_packages = [pkg for pkg in packages if pkg.packfiles]
                packages = [pkg for pkg in packages if not pkg.packfiles]
            if packages:
                record_usage(docker_install_pkgs=True)
                try:
                    installer = select_installer(pack, runs,
                                                 target_distribution)
                except CantFindInstaller as e:
                    logging.error("Need to install %d packages but couldn't "
                                  "select a package installer: %s",
                                  len(packages), e)
                    sys.exit(1)
                # Updates package sources
                update_script = installer.update_script()
                if update_script:
                    fp.write('    %s && \\\n' % update_script)
                # Installs necessary packages
                fp.write('    %s && \\\n' % installer.install_script(packages))
                logging.info("Dockerfile will install the %d software "
                             "packages that were not packed", len(packages))
            else:
                record_usage(docker_install_pkgs=False)

            # Untar
            paths = set()
            pathlist = []
            # Add intermediate directories, and check for existence in the tar
            logging.info("Generating file list...")
            missing_files = chain.from_iterable(pkg.files
                                                for pkg in missing_packages)
            data_files = rpz_pack.data_filenames()
            listoffiles = list(chain(other_files, missing_files))
            for f in listoffiles:
                if f.path.name == 'resolv.conf' and (
                        f.path.lies_under('/etc') or
                        f.path.lies_under('/run') or
                        f.path.lies_under('/var')):
                    continue
                path = PosixPath('/')
                for c in rpz_pack.remove_data_prefix(f.path).components:
                    path = path / c
                    if path in paths:
                        continue
                    paths.add(path)
                    if path in data_files:
                        pathlist.append(path)
                    else:
                        logging.info("Missing file %s", path)
            rpz_pack.close()
            # FIXME : for some reason we need reversed() here, I'm not sure why
            # Need to read more of tar's docs.
            # TAR bug: --no-overwrite-dir removes --keep-old-files
            with (target / 'rpz-files.list').open('wb') as lfp:
                for p in reversed(pathlist):
                    lfp.write(join_root(rpz_pack.data_prefix, p).path)
                    lfp.write(b'\0')
            fp.write('    cd / && '
                     '(tar zpxf /reprozip_data.tgz -U --recursive-unlink '
                     '--numeric-owner --strip=1 --null -T /rpz-files.list || '
                     '/busybox echo "TAR reports errors, this might or might '
                     'not prevent the execution to run")\n')

            # Setup entry point
            fp.write('COPY rpz_entrypoint.sh /rpz_entrypoint.sh\n'
                     'ENTRYPOINT ["/busybox", "sh", "/rpz_entrypoint.sh"]\n')

        # Write entry point script
        logging.info("Writing %s...", target / 'rpz_entrypoint.sh')
        with (target / 'rpz_entrypoint.sh').open('w', encoding='utf-8',
                                                 newline='\n') as fp:
            # The entrypoint gets some arguments from the run command
            # By default, it just does all the runs
            # "run N" executes the run with that number
            # "cmd STR" sets a replacement command-line for the next run
            # "do STR" executes a command as-is
            fp.write(
                '#!/bin/sh\n'
                '\n'
                'COMMAND=\n'
                'ENVVARS=\n'
                '\n'
                'if [ $# = 0 ]; then\n'
                '    exec /busybox sh /rpz_entrypoint.sh')
            for nb in irange(len(runs)):
                fp.write(' run %d' % nb)
            fp.write(
                '\n'
                'fi\n'
                '\n'
                'while [ $# != 0 ]; do\n'
                '    case "$1" in\n'
                '        help)\n'
                '            echo "Image built from reprounzip-docker" >&2\n'
                '            echo "Usage: docker run <image> [cmd word [word '
                '...]] [run <R>]" >&2\n'
                '            echo "    \\`cmd ...\\` changes the command for '
                'the next \\`run\\` option" >&2\n'
                '            echo "    \\`run <name|number>\\` runs the '
                'specified run" >&2\n'
                '            echo "By default, all the runs are executed." '
                '>&2\n'
                '            echo "The runs in this image are:" >&2\n')
            for run in runs:
                fp.write(
                    '            echo "    {name}: {cmdline}" >&2\n'.format(
                        name=run['id'],
                        cmdline=' '.join(shell_escape(a)
                                         for a in run['argv'])))
            fp.write(
                '            exit 0\n'
                '        ;;\n'
                '        do)\n'
                '            shift\n'
                '            $1\n'
                '        ;;\n'
                '        env)\n'
                '            shift\n'
                '            ENVVARS="$1"\n'
                '        ;;\n'
                '        cmd)\n'
                '            shift\n'
                '            COMMAND="$1"\n'
                '        ;;\n'
                '        run)\n'
                '            shift\n'
                '            case "$1" in\n')
            for i, run in enumerate(runs):
                cmdline = ' '.join([run['binary']] + run['argv'][1:])
                fp.write(
                    '                {name})\n'
                    '                    RUNCOMMAND={cmd}\n'
                    '                    RUNWD={wd}\n'
                    '                    RUNENV={env}\n'
                    '                    RUNUID={uid}\n'
                    '                    RUNGID={gid}\n'
                    '                ;;\n'.format(
                        name='%s|%d' % (run['id'], i),
                        cmd=shell_escape(cmdline),
                        wd=shell_escape(run['workingdir']),
                        env=shell_escape(' '.join(
                            '%s=%s' % (shell_escape(k), shell_escape(v))
                            for k, v in iteritems(run['environ']))),
                        uid=run.get('uid', 1000),
                        gid=run.get('gid', 1000)))
            fp.write(
                '                *)\n'
                '                    echo "RPZ: Unknown run $1" >&2\n'
                '                    exit 1\n'
                '                ;;\n'
                '            esac\n'
                '            if [ -n "$COMMAND" ]; then\n'
                '                RUNCOMMAND="$COMMAND"\n'
                '                COMMAND=\n'
                '            fi\n'
                '            export RUNWD; export RUNENV; export ENVVARS; '
                'export RUNCOMMAND\n'
                '            /rpzsudo "#$RUNUID" "#$RUNGID" /busybox sh -c '
                '"cd \\"\\$RUNWD\\" && /busybox env -i $RUNENV $ENVVARS '
                '$RUNCOMMAND"\n'
                '            ENVVARS=\n'
                '        ;;\n'
                '        *)\n'
                '            echo "RPZ: Unknown option $1" >&2\n'
                '            exit 1\n'
                '        ;;\n'
                '    esac\n'
                '    shift\n'
                'done\n')

        # Meta-data for reprounzip
        write_dict(target, metadata_initial_iofiles(config))

        signals.post_setup(target=target, pack=pack)
    except Exception:
        target.rmtree(ignore_errors=True)
        raise
Beispiel #34
0
def chroot_create(args):
    """Unpacks the experiment in a folder so it can be run with chroot.

    All the files in the pack are unpacked; system files are copied only if
    they were not packed, and busybox is installed if /bin/sh wasn't packed.

    In addition, input files are put in a tar.gz (so they can be put back after
    an upload) and the configuration file is extracted.
    """
    if not args.pack:
        logging.critical("setup/create needs the pack filename")
        sys.exit(1)

    pack = Path(args.pack[0])
    target = Path(args.target[0])
    if target.exists():
        logging.critical("Target directory exists")
        sys.exit(1)

    if DefaultAbstractPath is not PosixPath:
        logging.critical("Not unpacking on POSIX system")
        sys.exit(1)

    signals.pre_setup(target=target, pack=pack)

    # We can only restore owner/group of files if running as root
    restore_owner = should_restore_owner(args.restore_owner)

    # Unpacks configuration file
    tar = tarfile.open(str(pack), 'r:*')
    member = tar.getmember('METADATA/config.yml')
    member.name = 'config.yml'
    tar.extract(member, str(target))

    # Loads config
    runs, packages, other_files = load_config_file(target / 'config.yml', True)

    target.mkdir()
    root = (target / 'root').absolute()
    root.mkdir()

    # Checks that everything was packed
    packages_not_packed = [pkg for pkg in packages if not pkg.packfiles]
    if packages_not_packed:
        record_usage(chroot_missing_pkgs=True)
        logging.warning(
            "According to configuration, some files were left out "
            "because they belong to the following packages:%s"
            "\nWill copy files from HOST SYSTEM",
            ''.join('\n    %s' % pkg for pkg in packages_not_packed))
        missing_files = False
        for pkg in packages_not_packed:
            for f in pkg.files:
                f = Path(f.path)
                if not f.exists():
                    logging.error(
                        "Missing file %s (from package %s) on host, "
                        "experiment will probably miss it", f, pkg.name)
                    missing_files = True
                    continue
                dest = join_root(root, f)
                dest.parent.mkdir(parents=True)
                if f.is_link():
                    dest.symlink(f.read_link())
                else:
                    f.copy(dest)
                if restore_owner:
                    stat = f.stat()
                    dest.chown(stat.st_uid, stat.st_gid)
        if missing_files:
            record_usage(chroot_mising_files=True)

    # Unpacks files
    if any('..' in m.name or m.name.startswith('/') for m in tar.getmembers()):
        logging.critical("Tar archive contains invalid pathnames")
        sys.exit(1)
    members = [m for m in tar.getmembers() if m.name.startswith('DATA/')]
    for m in members:
        m.name = m.name[5:]
    if not restore_owner:
        uid = os.getuid()
        gid = os.getgid()
        for m in members:
            m.uid = uid
            m.gid = gid
    logging.info("Extracting files...")
    tar.extractall(str(root), members)
    tar.close()

    # Sets up /bin/sh and /usr/bin/env, downloading busybox if necessary
    sh_path = join_root(root, Path('/bin/sh'))
    env_path = join_root(root, Path('/usr/bin/env'))
    if not sh_path.lexists() or not env_path.lexists():
        logging.info("Setting up busybox...")
        busybox_path = join_root(root, Path('/bin/busybox'))
        busybox_path.parent.mkdir(parents=True)
        with make_dir_writable(join_root(root, Path('/bin'))):
            download_file(busybox_url(runs[0]['architecture']), busybox_path)
            busybox_path.chmod(0o755)
            if not sh_path.lexists():
                sh_path.parent.mkdir(parents=True)
                sh_path.symlink('/bin/busybox')
            if not env_path.lexists():
                env_path.parent.mkdir(parents=True)
                env_path.symlink('/bin/busybox')

    # Original input files, so upload can restore them
    if any(run['input_files'] for run in runs):
        logging.info("Packing up original input files...")
        inputtar = tarfile.open(str(target / 'inputs.tar.gz'), 'w:gz')
        for run in runs:
            for ifile in itervalues(run['input_files']):
                inputtar.add(str(join_root(root, PosixPath(ifile))),
                             str(PosixPath(ifile)))
        inputtar.close()

    # Meta-data for reprounzip
    write_dict(target / '.reprounzip', {}, 'chroot')

    signals.post_setup(target=target)
Beispiel #35
0
def chroot_create(args):
    """Unpacks the experiment in a folder so it can be run with chroot.

    All the files in the pack are unpacked; system files are copied only if
    they were not packed, and busybox is installed if /bin/sh wasn't packed.

    In addition, input files are put in a tar.gz (so they can be put back after
    an upload) and the configuration file is extracted.
    """
    if not args.pack:
        logging.critical("setup/create needs the pack filename")
        sys.exit(1)

    pack = Path(args.pack[0])
    target = Path(args.target[0])
    if target.exists():
        logging.critical("Target directory exists")
        sys.exit(1)

    if DefaultAbstractPath is not PosixPath:
        logging.critical("Not unpacking on POSIX system")
        sys.exit(1)

    signals.pre_setup(target=target, pack=pack)

    # We can only restore owner/group of files if running as root
    restore_owner = should_restore_owner(args.restore_owner)

    # Unpacks configuration file
    tar = tarfile.open(str(pack), 'r:*')
    member = tar.getmember('METADATA/config.yml')
    member.name = 'config.yml'
    tar.extract(member, str(target))

    # Loads config
    runs, packages, other_files = load_config_file(target / 'config.yml', True)

    target.mkdir()
    root = (target / 'root').absolute()
    root.mkdir()

    # Checks that everything was packed
    packages_not_packed = [pkg for pkg in packages if not pkg.packfiles]
    if packages_not_packed:
        record_usage(chroot_missing_pkgs=True)
        logging.warning("According to configuration, some files were left out "
                        "because they belong to the following packages:%s"
                        "\nWill copy files from HOST SYSTEM",
                        ''.join('\n    %s' % pkg
                                for pkg in packages_not_packed))
        missing_files = False
        for pkg in packages_not_packed:
            for f in pkg.files:
                f = Path(f.path)
                if not f.exists():
                    logging.error(
                            "Missing file %s (from package %s) on host, "
                            "experiment will probably miss it",
                            f, pkg.name)
                    missing_files = True
                    continue
                dest = join_root(root, f)
                dest.parent.mkdir(parents=True)
                if f.is_link():
                    dest.symlink(f.read_link())
                else:
                    f.copy(dest)
                if restore_owner:
                    stat = f.stat()
                    dest.chown(stat.st_uid, stat.st_gid)
        if missing_files:
            record_usage(chroot_mising_files=True)

    # Unpacks files
    if any('..' in m.name or m.name.startswith('/') for m in tar.getmembers()):
        logging.critical("Tar archive contains invalid pathnames")
        sys.exit(1)
    members = [m for m in tar.getmembers() if m.name.startswith('DATA/')]
    for m in members:
        m.name = m.name[5:]
    if not restore_owner:
        uid = os.getuid()
        gid = os.getgid()
        for m in members:
            m.uid = uid
            m.gid = gid
    logging.info("Extracting files...")
    tar.extractall(str(root), members)
    tar.close()

    # Sets up /bin/sh and /usr/bin/env, downloading busybox if necessary
    sh_path = join_root(root, Path('/bin/sh'))
    env_path = join_root(root, Path('/usr/bin/env'))
    if not sh_path.lexists() or not env_path.lexists():
        logging.info("Setting up busybox...")
        busybox_path = join_root(root, Path('/bin/busybox'))
        busybox_path.parent.mkdir(parents=True)
        with make_dir_writable(join_root(root, Path('/bin'))):
            download_file(busybox_url(runs[0]['architecture']),
                          busybox_path)
            busybox_path.chmod(0o755)
            if not sh_path.lexists():
                sh_path.parent.mkdir(parents=True)
                sh_path.symlink('/bin/busybox')
            if not env_path.lexists():
                env_path.parent.mkdir(parents=True)
                env_path.symlink('/bin/busybox')

    # Original input files, so upload can restore them
    if any(run['input_files'] for run in runs):
        logging.info("Packing up original input files...")
        inputtar = tarfile.open(str(target / 'inputs.tar.gz'), 'w:gz')
        for run in runs:
            for ifile in itervalues(run['input_files']):
                inputtar.add(str(join_root(root, PosixPath(ifile))),
                             str(PosixPath(ifile)))
        inputtar.close()

    # Meta-data for reprounzip
    write_dict(target / '.reprounzip', {}, 'chroot')

    signals.post_setup(target=target)
Beispiel #36
0
def directory_create(args):
    """Unpacks the experiment in a folder.

    Only the files that are not part of a package are copied (unless they are
    missing from the system and were packed).

    In addition, input files are put in a tar.gz (so they can be put back after
    an upload) and the configuration file is extracted.
    """
    if not args.pack:
        logger.critical("setup needs the pack filename")
        sys.exit(1)

    pack = Path(args.pack[0])
    target = Path(args.target[0])
    if target.exists():
        logger.critical("Target directory exists")
        sys.exit(1)

    if not issubclass(DefaultAbstractPath, PosixPath):
        logger.critical("Not unpacking on POSIX system")
        sys.exit(1)

    signals.pre_setup(target=target, pack=pack)

    # Unpacks configuration file
    rpz_pack = RPZPack(pack)
    rpz_pack.extract_config(target / 'config.yml')

    # Loads config
    config = load_config_file(target / 'config.yml', True)
    packages = config.packages

    target.mkdir()
    root = (target / 'root').absolute()

    # Checks packages
    missing_files = False
    for pkg in packages:
        if pkg.packfiles:
            continue
        for f in pkg.files:
            if not Path(f.path).exists():
                logger.error(
                    "Missing file %s (from package %s that wasn't packed) "
                    "on host, experiment will probably miss it.",
                    f, pkg.name)
                missing_files = True
    if missing_files:
        record_usage(directory_missing_pkgs=True)
        logger.error("Some packages are missing, you should probably install "
                     "them.\nUse 'reprounzip installpkgs -h' for help")

    root.mkdir()
    try:
        # Unpacks files
        members = rpz_pack.list_data()
        for m in members:
            # Remove 'DATA/' prefix
            m.name = str(rpz_pack.remove_data_prefix(m.name))
            # Makes symlink targets relative
            if m.issym():
                linkname = PosixPath(m.linkname)
                if linkname.is_absolute:
                    m.linkname = join_root(root, PosixPath(m.linkname)).path
        logger.info("Extracting files...")
        rpz_pack.extract_data(root, members)
        rpz_pack.close()

        # Original input files, so upload can restore them
        input_files = [f.path for f in config.inputs_outputs.values()
                       if f.read_runs]
        if input_files:
            logger.info("Packing up original input files...")
            inputtar = tarfile.open(str(target / 'inputs.tar.gz'), 'w:gz')
            for ifile in input_files:
                filename = join_root(root, ifile)
                if filename.exists():
                    inputtar.add(str(filename), str(ifile))
            inputtar.close()

        # Meta-data for reprounzip
        metadata_write(target, metadata_initial_iofiles(config), 'directory')

        signals.post_setup(target=target, pack=pack)
    except Exception:
        rmtree_fixed(root)
        raise
Beispiel #37
0
def docker_setup_create(args):
    """Sets up the experiment to be run in a Docker-built container.
    """
    pack = Path(args.pack[0])
    target = Path(args.target[0])
    if target.exists():
        logger.critical("Target directory exists")
        sys.exit(1)

    signals.pre_setup(target=target, pack=pack)

    target.mkdir()

    try:
        # Unpacks configuration file
        rpz_pack = RPZPack(pack)
        rpz_pack.extract_config(target / 'config.yml')

        # Loads config
        runs, packages, other_files = config = load_config(
            target / 'config.yml', True)

        if args.base_image:
            record_usage(docker_explicit_base=True)
            base_image = args.base_image[0]
            if args.distribution:
                target_distribution = args.distribution[0]
            else:
                target_distribution = None
        else:
            target_distribution, base_image = select_image(runs)
        logger.info("Using base image %s", base_image)
        logger.debug("Distribution: %s", target_distribution or "unknown")

        rpz_pack.copy_data_tar(target / 'data.tgz')

        arch = runs[0]['architecture']

        # Writes Dockerfile
        logger.info("Writing %s...", target / 'Dockerfile')
        with (target / 'Dockerfile').open('w', encoding='utf-8',
                                          newline='\n') as fp:
            fp.write('FROM %s\n\n' % base_image)

            # Installs busybox
            download_file(busybox_url(arch),
                          target / 'busybox',
                          'busybox-%s' % arch)
            fp.write('COPY busybox /busybox\n')

            # Installs rpzsudo
            download_file(sudo_url(arch),
                          target / 'rpzsudo',
                          'rpzsudo-%s' % arch)
            fp.write('COPY rpzsudo /rpzsudo\n\n')

            fp.write('COPY data.tgz /reprozip_data.tgz\n\n')
            fp.write('COPY rpz-files.list /rpz-files.list\n')
            fp.write('RUN \\\n'
                     '    chmod +x /busybox /rpzsudo && \\\n')

            if args.install_pkgs:
                # Install every package through package manager
                missing_packages = []
            else:
                # Only install packages that were not packed
                missing_packages = [pkg for pkg in packages if pkg.packfiles]
                packages = [pkg for pkg in packages if not pkg.packfiles]
            if packages:
                record_usage(docker_install_pkgs=True)
                try:
                    installer = select_installer(pack, runs,
                                                 target_distribution)
                except CantFindInstaller as e:
                    logger.error("Need to install %d packages but couldn't "
                                 "select a package installer: %s",
                                 len(packages), e)
                    sys.exit(1)
                # Updates package sources
                update_script = installer.update_script()
                if update_script:
                    fp.write('    %s && \\\n' % update_script)
                # Installs necessary packages
                fp.write('    %s && \\\n' % installer.install_script(packages))
                logger.info("Dockerfile will install the %d software "
                            "packages that were not packed", len(packages))
            else:
                record_usage(docker_install_pkgs=False)

            # Untar
            paths = set()
            pathlist = []
            # Add intermediate directories, and check for existence in the tar
            logger.info("Generating file list...")
            missing_files = chain.from_iterable(pkg.files
                                                for pkg in missing_packages)
            data_files = rpz_pack.data_filenames()
            listoffiles = list(chain(other_files, missing_files))
            for f in listoffiles:
                if f.path.name == 'resolv.conf' and (
                        f.path.lies_under('/etc') or
                        f.path.lies_under('/run') or
                        f.path.lies_under('/var')):
                    continue
                path = PosixPath('/')
                for c in rpz_pack.remove_data_prefix(f.path).components:
                    path = path / c
                    if path in paths:
                        continue
                    paths.add(path)
                    if path in data_files:
                        pathlist.append(path)
                    else:
                        logger.info("Missing file %s", path)
            rpz_pack.close()
            # FIXME : for some reason we need reversed() here, I'm not sure why
            # Need to read more of tar's docs.
            # TAR bug: --no-overwrite-dir removes --keep-old-files
            with (target / 'rpz-files.list').open('wb') as lfp:
                for p in reversed(pathlist):
                    lfp.write(join_root(rpz_pack.data_prefix, p).path)
                    lfp.write(b'\0')
            fp.write('    cd / && '
                     '(tar zpxf /reprozip_data.tgz -U --recursive-unlink '
                     '--numeric-owner --strip=1 --null -T /rpz-files.list || '
                     '/busybox echo "TAR reports errors, this might or might '
                     'not prevent the execution to run")\n')

            # Setup entry point
            fp.write('COPY rpz_entrypoint.sh /rpz_entrypoint.sh\n'
                     'ENTRYPOINT ["/busybox", "sh", "/rpz_entrypoint.sh"]\n')

        # Write entry point script
        logger.info("Writing %s...", target / 'rpz_entrypoint.sh')
        with (target / 'rpz_entrypoint.sh').open('w', encoding='utf-8',
                                                 newline='\n') as fp:
            # The entrypoint gets some arguments from the run command
            # By default, it just does all the runs
            # "run N" executes the run with that number
            # "cmd STR" sets a replacement command-line for the next run
            # "do STR" executes a command as-is
            fp.write(
                '#!/bin/sh\n'
                '\n'
                'COMMAND=\n'
                'ENVVARS=\n'
                '\n'
                'if [ $# = 0 ]; then\n'
                '    exec /busybox sh /rpz_entrypoint.sh')
            for nb in irange(len(runs)):
                fp.write(' run %d' % nb)
            fp.write(
                '\n'
                'fi\n'
                '\n'
                'while [ $# != 0 ]; do\n'
                '    case "$1" in\n'
                '        help)\n'
                '            echo "Image built from reprounzip-docker" >&2\n'
                '            echo "Usage: docker run <image> [cmd "word [word '
                '...]"] [run <R>]" >&2\n'
                '            echo "    \\`cmd ...\\` changes the command for '
                'the next \\`run\\` option" >&2\n'
                '            echo "    \\`run <name|number>\\` runs the '
                'specified run" >&2\n'
                '            echo "By default, all the runs are executed." '
                '>&2\n'
                '            echo "The runs in this image are:" >&2\n')
            for run in runs:
                fp.write(
                    '            echo "    {name}: {cmdline}" >&2\n'.format(
                        name=run['id'],
                        cmdline=' '.join(shell_escape(a)
                                         for a in run['argv'])))
            fp.write(
                '            exit 0\n'
                '        ;;\n'
                '        do)\n'
                '            shift\n'
                '            $1\n'
                '        ;;\n'
                '        env)\n'
                '            shift\n'
                '            ENVVARS="$1"\n'
                '        ;;\n'
                '        cmd)\n'
                '            shift\n'
                '            COMMAND="$1"\n'
                '        ;;\n'
                '        run)\n'
                '            shift\n'
                '            case "$1" in\n')
            for i, run in enumerate(runs):
                cmdline = ' '.join([run['binary']] + run['argv'][1:])
                fp.write(
                    '                {name})\n'
                    '                    RUNCOMMAND={cmd}\n'
                    '                    RUNWD={wd}\n'
                    '                    RUNENV={env}\n'
                    '                    RUNUID={uid}\n'
                    '                    RUNGID={gid}\n'
                    '                ;;\n'.format(
                        name='%s|%d' % (run['id'], i),
                        cmd=shell_escape(cmdline),
                        wd=shell_escape(run['workingdir']),
                        env=shell_escape(' '.join(
                            '%s=%s' % (shell_escape(k), shell_escape(v))
                            for k, v in iteritems(run['environ']))),
                        uid=run.get('uid', 1000),
                        gid=run.get('gid', 1000)))
            fp.write(
                '                *)\n'
                '                    echo "RPZ: Unknown run $1" >&2\n'
                '                    exit 1\n'
                '                ;;\n'
                '            esac\n'
                '            if [ -n "$COMMAND" ]; then\n'
                '                RUNCOMMAND="$COMMAND"\n'
                '                COMMAND=\n'
                '            fi\n'
                '            export RUNWD; export RUNENV; export ENVVARS; '
                'export RUNCOMMAND\n'
                '            /rpzsudo "#$RUNUID" "#$RUNGID" /busybox sh -c '
                '"cd \\"\\$RUNWD\\" && /busybox env -i $RUNENV $ENVVARS '
                '$RUNCOMMAND; echo \\"*** Command finished, status: \\$?\\""\n'
                '            ENVVARS=\n'
                '        ;;\n'
                '        *)\n'
                '            echo "RPZ: Unknown option $1" >&2\n'
                '            exit 1\n'
                '        ;;\n'
                '    esac\n'
                '    shift\n'
                'done\n')

        # Meta-data for reprounzip
        write_dict(target, metadata_initial_iofiles(config))

        signals.post_setup(target=target, pack=pack)
    except Exception:
        target.rmtree(ignore_errors=True)
        raise