Exemple #1
0
def showfiles(args):
    """Writes out the input and output files.

    Works both for a pack file and for an extracted directory.
    """
    pack = Path(args.pack[0])

    if not pack.exists():
        logging.critical("Pack or directory %s does not exist", pack)
        sys.exit(1)

    if pack.is_dir():
        # Reads info from an unpacked directory
        runs, packages, other_files = load_config_file(pack / 'config.yml',
                                                       canonical=True)
        # The '.reprounzip' file is a pickled dictionary, it contains the name
        # of the files that replaced each input file (if upload was used)
        with pack.open('rb', '.reprounzip') as fp:
            unpacked_info = pickle.load(fp)
        input_files = unpacked_info.get('input_files', {})

        print("Input files:")
        for i, run in enumerate(runs):
            if len(runs) > 1:
                print("  Run %d:" % i)
            for input_name, path in iteritems(run['input_files']):
                print("    %s (%s)" % (input_name, path))
                if input_files.get(input_name) is not None:
                    assigned = PosixPath(input_files[input_name])
                else:
                    assigned = "(original)"
                print("      %s" % assigned)

        print("Output files:")
        for i, run in enumerate(runs):
            if len(runs) > 1:
                print("  Run %d:" % i)
            for output_name, path in iteritems(run['output_files']):
                print("    %s (%s)" % (output_name, path))

    else:  # pack.is_file()
        # Reads info from a pack file
        runs, packages, other_files = load_config(pack)

        print("Input files:")
        for i, run in enumerate(runs):
            if len(runs) > 1:
                print("  Run %d:" % i)
            for input_name, path in iteritems(run['input_files']):
                print("    %s (%s)" % (input_name, path))

        print("Output files:")
        for i, run in enumerate(runs):
            if len(runs) > 1:
                print("  Run %d:" % i)
            for output_name, path in iteritems(run['output_files']):
                print("    %s (%s)" % (output_name, path))
Exemple #2
0
def showfiles(args):
    """Writes out the input and output files.

    Works both for a pack file and for an extracted directory.
    """
    pack = Path(args.pack[0])

    if not pack.exists():
        logging.critical("Pack or directory %s does not exist", pack)
        sys.exit(1)

    if pack.is_dir():
        # Reads info from an unpacked directory
        config = load_config_file(pack / 'config.yml',
                                  canonical=True)
        # The '.reprounzip' file is a pickled dictionary, it contains the name
        # of the files that replaced each input file (if upload was used)
        with pack.open('rb', '.reprounzip') as fp:
            unpacked_info = pickle.load(fp)
        assigned_input_files = unpacked_info.get('input_files', {})

        print("Input files:")
        for input_name, f in iteritems(config.inputs_outputs):
            if not f.read_runs:
                continue
            print("    %s (%s)" % (input_name, f.path))
            if assigned_input_files.get(input_name) is not None:
                assigned = assigned_input_files[input_name]
            else:
                assigned = "(original)"
            print("      %s" % assigned)

        print("Output files:")
        for output_name, f in iteritems(config.inputs_outputs):
            if f.write_runs:
                print("    %s (%s)" % (output_name, f.path))

    else:  # pack.is_file()
        # Reads info from a pack file
        config = load_config(pack)

        print("Input files:")
        for input_name, f in iteritems(config.inputs_outputs):
            if f.read_runs:
                print("    %s (%s)" % (input_name, f.path))

        print("Output files:")
        for output_name, f in iteritems(config.inputs_outputs):
            if f.write_runs:
                print("    %s (%s)" % (output_name, f.path))
def metadata_initial_iofiles(config, dct=None):
    """Add the initial state of the {in/out}put files to the unpacker metadata.

    :param config: The configuration as returned by `load_config()`, which will
    be used to list the input and output files and to determine which ones have
    been packed (and therefore exist initially).

    The `input_files` key contains a dict mapping the name to either:
      * None (or inexistent): original file and exists
      * False: doesn't exist (wasn't packed)
      * True: has been generated by one of the run since the experiment was
        unpacked
      * basestring: the user uploaded a file with this path, and no run has
        overwritten it yet
    """
    if dct is None:
        dct = {}

    path2iofile = {f.path: n for n, f in iteritems(config.inputs_outputs)}

    def packed_files():
        yield config.other_files
        for pkg in config.packages:
            if pkg.packfiles:
                yield pkg.files

    for f in itertools.chain.from_iterable(packed_files()):
        f = f.path
        path2iofile.pop(f, None)

    dct['input_files'] = dict((n, False) for n in itervalues(path2iofile))

    return dct
Exemple #4
0
    def __call__(self, **kwargs):
        info = {}
        for arg, argtype in iteritems(self._args):
            if argtype == Signal.REQUIRED:
                try:
                    info[arg] = kwargs.pop(arg)
                except KeyError:
                    warnings.warn(
                        "signal: Missing required argument %s; " "signal ignored" % arg,
                        category=SignalWarning,
                        stacklevel=2,
                    )
                    return
            else:
                if arg in kwargs:
                    info[arg] = kwargs.pop(arg)
                    if argtype == Signal.DEPRECATED:
                        warnings.warn("signal: Argument %s is deprecated" % arg, category=SignalWarning, stacklevel=2)
        if kwargs:
            arg = next(iter(kwargs))
            warnings.warn("signal: Unexpected argument %s; signal ignored" % arg, category=SignalWarning, stacklevel=2)
            return

        for listener in self._listeners:
            try:
                listener(**info)
            except Exception:
                traceback.print_exc()
                warnings.warn("signal: Got an exception calling a signal", category=SignalWarning)
Exemple #5
0
def metadata_initial_iofiles(config, dct=None):
    """Add the initial state of the input files to the unpacker metadata.

    :param config: The configuration as returned by `load_config()`, which will
    be used to list the input files and to determine which ones have been
    packed (and therefore exist initially).

    The `input_files` key contains a dict mapping the name to either:
      * None (or inexistent): original file and exists
      * False: doesn't exist (wasn't packed)
      * True: has been generated by one of the run since the experiment was
        unpacked
      * basestring: the user uploaded a file with this path, and no run has
        overwritten it yet
    """
    if dct is None:
        dct = {}

    path2iofile = {f.path: n
                   for n, f in iteritems(config.inputs_outputs)
                   if f.read_runs}

    def packed_files():
        yield config.other_files
        for pkg in config.packages:
            if pkg.packfiles:
                yield pkg.files

    for f in itertools.chain.from_iterable(packed_files()):
        f = f.path
        path2iofile.pop(f, None)

    dct['input_files'] = dict((n, False) for n in itervalues(path2iofile))

    return dct
Exemple #6
0
    def __call__(self, **kwargs):
        info = {}
        for arg, argtype in iteritems(self._args):
            if argtype == Signal.REQUIRED:
                try:
                    info[arg] = kwargs.pop(arg)
                except KeyError:
                    warnings.warn("signal: Missing required argument %s; "
                                  "signal ignored" % arg,
                                  category=SignalWarning,
                                  stacklevel=2)
                    return
            else:
                if arg in kwargs:
                    info[arg] = kwargs.pop(arg)
                    if argtype == Signal.DEPRECATED:
                        warnings.warn("signal: Argument %s is deprecated" %
                                      arg,
                                      category=SignalWarning,
                                      stacklevel=2)
        if kwargs:
            arg = next(iter(kwargs))
            warnings.warn("signal: Unexpected argument %s; signal ignored" %
                          arg,
                          category=SignalWarning,
                          stacklevel=2)
            return

        for listener in self._listeners:
            try:
                listener(**info)
            except Exception:
                traceback.print_exc()
                warnings.warn("signal: Got an exception calling a signal",
                              category=SignalWarning)
Exemple #7
0
def chroot_run(args):
    """Runs the command in the chroot.
    """
    target = Path(args.target[0])
    unpacked_info = metadata_read(target, 'chroot')
    cmdline = args.cmdline

    # Loads config
    config = load_config_file(target / 'config.yml', True)
    runs = config.runs

    selected_runs = get_runs(runs, args.run, cmdline)

    root = target / 'root'

    # X11 handler
    x11 = X11Handler(args.x11, ('local', socket.gethostname()),
                     args.x11_display)

    cmds = []
    for run_number in selected_runs:
        run = runs[run_number]
        cmd = 'cd %s && ' % shell_escape(run['workingdir'])
        cmd += '/usr/bin/env -i '
        environ = x11.fix_env(run['environ'])
        environ = fixup_environment(environ, args)
        cmd += ' '.join('%s=%s' % (shell_escape(k), shell_escape(v))
                        for k, v in iteritems(environ))
        cmd += ' '
        # FIXME : Use exec -a or something if binary != argv[0]
        if cmdline is None:
            argv = [run['binary']] + run['argv'][1:]
        else:
            argv = cmdline
        cmd += ' '.join(shell_escape(a) for a in argv)
        userspec = '%s:%s' % (run.get('uid', 1000), run.get('gid', 1000))
        cmd = 'chroot --userspec=%s %s /bin/sh -c %s' % (
            userspec, shell_escape(unicode_(root)), shell_escape(cmd))
        cmds.append(cmd)
    cmds = [
        'chroot %s /bin/sh -c %s' %
        (shell_escape(unicode_(root)), shell_escape(c)) for c in x11.init_cmds
    ] + cmds
    cmds = ' && '.join(cmds)

    # Starts forwarding
    forwarders = []
    for portnum, connector in x11.port_forward:
        fwd = LocalForwarder(connector, portnum)
        forwarders.append(fwd)

    signals.pre_run(target=target)
    retcode = interruptible_call(cmds, shell=True)
    stderr.write("\n*** Command finished, status: %d\n" % retcode)
    signals.post_run(target=target, retcode=retcode)

    # Update input file status
    metadata_update_run(config, unpacked_info, selected_runs)
    metadata_write(target, unpacked_info, 'chroot')
Exemple #8
0
def chroot_run(args):
    """Runs the command in the chroot.
    """
    target = Path(args.target[0])
    read_dict(target / '.reprounzip', 'chroot')
    cmdline = args.cmdline

    # Loads config
    runs, packages, other_files = load_config_file(target / 'config.yml', True)

    selected_runs = get_runs(runs, args.run, cmdline)

    root = target / 'root'

    # X11 handler
    x11 = X11Handler(args.x11, ('local', socket.gethostname()),
                     args.x11_display)

    cmds = []
    for run_number in selected_runs:
        run = runs[run_number]
        cmd = 'cd %s && ' % shell_escape(run['workingdir'])
        cmd += '/usr/bin/env -i '
        environ = x11.fix_env(run['environ'])
        cmd += ' '.join('%s=%s' % (k, shell_escape(v))
                        for k, v in iteritems(environ))
        cmd += ' '
        # FIXME : Use exec -a or something if binary != argv[0]
        if cmdline is None:
            argv = [run['binary']] + run['argv'][1:]
        else:
            argv = cmdline
        cmd += ' '.join(shell_escape(a) for a in argv)
        userspec = '%s:%s' % (run.get('uid', 1000),
                              run.get('gid', 1000))
        cmd = 'chroot --userspec=%s %s /bin/sh -c %s' % (
                userspec,
                shell_escape(unicode_(root)),
                shell_escape(cmd))
        cmds.append(cmd)
    cmds = ['chroot %s /bin/sh -c %s' % (shell_escape(unicode_(root)),
                                         shell_escape(c))
            for c in x11.init_cmds] + cmds
    cmds = ' && '.join(cmds)

    # Starts forwarding
    forwarders = []
    for portnum, connector in x11.port_forward:
        fwd = LocalForwarder(connector, portnum)
        forwarders.append(fwd)

    signals.pre_run(target=target)
    retcode = interruptible_call(cmds, shell=True)
    sys.stderr.write("\n*** Command finished, status: %d\n" % retcode)
    signals.post_run(target=target, retcode=retcode)
Exemple #9
0
def docker_setup_build(args):
    """Builds the container from the Dockerfile
    """
    target = Path(args.target[0])
    unpacked_info = read_dict(target)
    if 'initial_image' in unpacked_info:
        logger.critical("Image already built")
        sys.exit(1)

    if args.image_name:
        image = args.image_name[0]
        if not isinstance(image, bytes):
            image = image.encode('ascii')
    else:
        image = make_unique_name(b'reprounzip_image_')

    logger.info("Calling 'docker build'...")
    try:
        retcode = subprocess.call(args.docker_cmd.split() + ['build', '-t'] +
                                  args.docker_option + [image, '.'],
                                  cwd=target.path)
    except OSError:
        logger.critical("docker executable not found")
        sys.exit(1)
    else:
        if retcode != 0:
            logger.critical("docker build failed with code %d", retcode)
            sys.exit(1)
    logger.info("Initial image created: %s", image.decode('ascii'))

    unpacked_info['initial_image'] = image
    unpacked_info['current_image'] = image
    if 'DOCKER_MACHINE_NAME' in os.environ:
        unpacked_info['docker_host'] = {
            'type': 'docker-machine',
            'name': os.environ['DOCKER_MACHINE_NAME']
        }
    elif 'DOCKER_HOST' in os.environ:
        unpacked_info['docker_host'] = {
            'type':
            'custom',
            'env':
            dict((k, v) for k, v in iteritems(os.environ)
                 if k.startswith('DOCKER_'))
        }

    write_dict(target, unpacked_info)
def docker_setup_build(args):
    """Builds the container from the Dockerfile
    """
    target = Path(args.target[0])
    unpacked_info = read_dict(target)
    if 'initial_image' in unpacked_info:
        logger.critical("Image already built")
        sys.exit(1)

    if args.image_name:
        image = args.image_name[0]
        if not isinstance(image, bytes):
            image = image.encode('ascii')
    else:
        image = make_unique_name(b'reprounzip_image_')

    logger.info("Calling 'docker build'...")
    try:
        retcode = subprocess.call(args.docker_cmd.split() + ['build', '-t'] +
                                  args.docker_option + [image, '.'],
                                  cwd=target.path)
    except OSError:
        logger.critical("docker executable not found")
        sys.exit(1)
    else:
        if retcode != 0:
            logger.critical("docker build failed with code %d", retcode)
            sys.exit(1)
    logger.info("Initial image created: %s", image.decode('ascii'))

    unpacked_info['initial_image'] = image
    unpacked_info['current_image'] = image
    if 'DOCKER_MACHINE_NAME' in os.environ:
        unpacked_info['docker_host'] = {
            'type': 'docker-machine',
            'name': os.environ['DOCKER_MACHINE_NAME']}
    elif 'DOCKER_HOST' in os.environ:
        unpacked_info['docker_host'] = {
            'type': 'custom',
            'env': dict((k, v)
                        for k, v in iteritems(os.environ)
                        if k.startswith('DOCKER_'))}

    write_dict(target, unpacked_info)
Exemple #11
0
def metadata_update_run(config, dct, runs):
    """Update the unpacker metadata after some runs have executed.

    :param runs: An iterable of run numbers that were probably executed.

    This maintains a crude idea of the status of input files by updating the
    files that are outputs of the runs that were just executed. This means that
    files that were uploaded by the user will no longer be shown as uploaded
    (they have been overwritten by the experiment) and files that weren't
    packed exist from now on.

    This is not very reliable because a run might have created a file that is
    not designated as its output anyway, or might have failed and thus not
    created the output (or a bad output).
    """
    runs = set(runs)
    input_files = dct.setdefault('input_files', {})

    for name, fi in iteritems(config.inputs_outputs):
        if fi.read_runs and any(r in runs for r in fi.write_runs):
            input_files[name] = True
Exemple #12
0
def metadata_update_run(config, dct, runs):
    """Update the unpacker metadata after some runs have executed.

    :param runs: An iterable of run numbers that were probably executed.

    This maintains a crude idea of the status of input files by updating the
    files that are outputs of the runs that were just executed. This means that
    files that were uploaded by the user will no longer be shown as uploaded
    (they have been overwritten by the experiment) and files that weren't
    packed exist from now on.

    This is not very reliable because a run might have created a file that is
    not designated as its output anyway, or might have failed and thus not
    created the output (or a bad output).
    """
    runs = set(runs)
    input_files = dct.setdefault('input_files', {})

    for name, fi in iteritems(config.inputs_outputs):
        if fi.read_runs and any(r in runs for r in fi.write_runs):
            input_files[name] = True
Exemple #13
0
    def run(self, files):
        reprounzip.common.record_usage(download_files=len(files))
        output_files = dict(
            (n, f.path)
            for n, f in iteritems(self.get_config().inputs_outputs)
            if f.write_runs)

        # No argument: list all the output files and exit
        if not files:
            print("Output files:")
            for output_name in output_files:
                print("    %s" % output_name)
            return

        self.prepare_download(files)

        try:
            # Download files
            for filespec in files:
                filespec_split = filespec.split(':', 1)
                if len(filespec_split) != 2:
                    logging.critical("Invalid file specification: %r",
                                     filespec)
                    sys.exit(1)
                output_name, local_path = filespec_split

                try:
                    remote_path = output_files[output_name]
                except KeyError:
                    logging.critical("Invalid output file: %r", output_name)
                    sys.exit(1)

                logging.debug("Downloading file %s", remote_path)
                if not local_path:
                    self.download_and_print(remote_path)
                else:
                    self.download(remote_path, Path(local_path))
        finally:
            self.finalize()
Exemple #14
0
def chroot_run(args):
    """Runs the command in the chroot.
    """
    target = Path(args.target[0])
    read_dict(target / '.reprounzip', 'chroot')
    cmdline = args.cmdline

    # Loads config
    runs, packages, other_files = load_config_file(target / 'config.yml', True)

    selected_runs = get_runs(runs, args.run, cmdline)

    root = target / 'root'

    cmds = []
    for run_number in selected_runs:
        run = runs[run_number]
        cmd = 'cd %s && ' % shell_escape(run['workingdir'])
        cmd += '/usr/bin/env -i '
        cmd += ' '.join('%s=%s' % (k, shell_escape(v))
                        for k, v in iteritems(run['environ']))
        cmd += ' '
        # FIXME : Use exec -a or something if binary != argv[0]
        if cmdline is None:
            argv = [run['binary']] + run['argv'][1:]
        else:
            argv = cmdline
        cmd += ' '.join(shell_escape(a) for a in argv)
        userspec = '%s:%s' % (run.get('uid', 1000), run.get('gid', 1000))
        cmd = 'chroot --userspec=%s %s /bin/sh -c %s' % (
            userspec, shell_escape(unicode_(root)), shell_escape(cmd))
        cmds.append(cmd)
    cmds = ' && '.join(cmds)

    signals.pre_run(target=target)
    retcode = subprocess.call(cmds, shell=True)
    sys.stderr.write("\n*** Command finished, status: %d\n" % retcode)
    signals.post_run(target=target, retcode=retcode)
Exemple #15
0
def docker_run(args):
    """Runs the experiment in the container.
    """
    target = Path(args.target[0])
    unpacked_info = read_dict(target / '.reprounzip')
    cmdline = args.cmdline

    # Loads config
    runs, packages, other_files = load_config(target / 'config.yml', True)

    selected_runs = get_runs(runs, args.run, cmdline)

    # Destroy previous container
    if 'ran_container' in unpacked_info:
        container = unpacked_info.pop('ran_container')
        logging.info("Destroying previous container %s",
                     container.decode('ascii'))
        retcode = subprocess.call(['docker', 'rm', '-f', container])
        if retcode != 0:
            logging.error("Error deleting previous container %s",
                          container.decode('ascii'))
        write_dict(target / '.reprounzip', unpacked_info)

    # Use the initial image directly
    if 'current_image' in unpacked_info:
        image = unpacked_info['current_image']
        logging.debug("Running from image %s", image.decode('ascii'))
    else:
        logging.critical("Image doesn't exist yet, have you run setup/build?")
        sys.exit(1)

    # Name of new container
    container = make_unique_name(b'reprounzip_run_')

    cmds = []
    for run_number in selected_runs:
        run = runs[run_number]
        cmd = 'cd %s && ' % shell_escape(run['workingdir'])
        cmd += '/usr/bin/env -i '
        cmd += ' '.join('%s=%s' % (k, shell_escape(v))
                        for k, v in iteritems(run['environ']))
        cmd += ' '
        # FIXME : Use exec -a or something if binary != argv[0]
        if cmdline is None:
            argv = [run['binary']] + run['argv'][1:]
        else:
            argv = cmdline
        cmd += ' '.join(shell_escape(a) for a in argv)
        uid = run.get('uid', 1000)
        cmd = 'sudo -u \'#%d\' sh -c %s\n' % (uid, shell_escape(cmd))
        cmds.append(cmd)
    cmds = ' && '.join(cmds)

    signals.pre_run(target=target)

    # Run command in container
    logging.info("Starting container %s", container.decode('ascii'))
    retcode = subprocess.call(['docker', 'run', b'--name=' + container,
                               '-i', '-t', image,
                               '/bin/sh', '-c', cmds])
    sys.stderr.write("\n*** Command finished, status: %d\n" % retcode)

    # Store container name (so we can download output files)
    unpacked_info['ran_container'] = container
    write_dict(target / '.reprounzip', unpacked_info)

    signals.post_run(target=target, retcode=retcode)
Exemple #16
0
def directory_run(args):
    """Runs the command in the directory.
    """
    target = Path(args.target[0])
    unpacked_info = metadata_read(target, 'directory')
    cmdline = args.cmdline

    # Loads config
    config = load_config_file(target / 'config.yml', True)
    runs = config.runs

    selected_runs = get_runs(runs, args.run, cmdline)

    root = (target / 'root').absolute()

    # Gets library paths
    lib_dirs = []
    p = subprocess.Popen(['/sbin/ldconfig', '-v', '-N'],
                         stdout=subprocess.PIPE)
    try:
        for l in p.stdout:
            if len(l) < 3 or l[0] in (b' ', b'\t'):
                continue
            if l.endswith(b':\n'):
                lib_dirs.append(Path(l[:-2]))
    finally:
        p.wait()
    lib_dirs = (
        'export LD_LIBRARY_PATH=%s' %
        ':'.join(shell_escape(unicode_(join_root(root, d))) for d in lib_dirs))

    cmds = [lib_dirs]
    for run_number in selected_runs:
        run = runs[run_number]
        cmd = 'cd %s && ' % shell_escape(
            unicode_(join_root(root, Path(run['workingdir']))))
        cmd += '/usr/bin/env -i '
        environ = run['environ']
        environ = fixup_environment(environ, args)
        if args.x11:
            if 'DISPLAY' in os.environ:
                environ['DISPLAY'] = os.environ['DISPLAY']
            if 'XAUTHORITY' in os.environ:
                environ['XAUTHORITY'] = os.environ['XAUTHORITY']
        cmd += ' '.join('%s=%s' % (shell_escape(k), shell_escape(v))
                        for k, v in iteritems(environ) if k != 'PATH')
        cmd += ' '

        # PATH
        # Get the original PATH components
        path = [
            PosixPath(d) for d in run['environ'].get('PATH', '').split(':')
        ]
        # The same paths but in the directory
        dir_path = [join_root(root, d) for d in path if d.root == '/']
        # Rebuild string
        path = ':'.join(unicode_(d) for d in dir_path + path)
        cmd += 'PATH=%s ' % shell_escape(path)

        # FIXME : Use exec -a or something if binary != argv[0]
        if cmdline is None:
            argv = run['argv']

            # Rewrites command-line arguments that are absolute filenames
            rewritten = False
            for i in irange(len(argv)):
                try:
                    p = Path(argv[i])
                except UnicodeEncodeError:
                    continue
                if p.is_absolute:
                    rp = join_root(root, p)
                    if (rp.exists() or
                        (len(rp.components) > 3 and rp.parent.exists())):
                        argv[i] = str(rp)
                        rewritten = True
            if rewritten:
                logger.warning("Rewrote command-line as: %s",
                               ' '.join(shell_escape(a) for a in argv))
        else:
            argv = cmdline
        cmd += ' '.join(shell_escape(a) for a in argv)
        cmds.append(cmd)
    cmds = ' && '.join(cmds)

    signals.pre_run(target=target)
    retcode = interruptible_call(cmds, shell=True)
    stderr.write("\n*** Command finished, status: %d\n" % retcode)
    signals.post_run(target=target, retcode=retcode)

    # Update input file status
    metadata_update_run(config, unpacked_info, selected_runs)
    metadata_write(target, unpacked_info, 'directory')
Exemple #17
0
def graph_json(target, runs, packages, other_files, package_map, edges,
               inputs_outputs, inputs_outputs_map,
               level_pkgs, level_processes, level_other_files):
    """Writes a JSON file suitable for further processing.
    """
    # Packages
    if level_pkgs in (LVL_PKG_IGNORE, LVL_PKG_DROP):
        json_packages = []
    else:
        json_packages = [pkg.json(level_pkgs) for pkg in packages]

    # Other files
    json_other_files = [unicode_(fi) for fi in sorted(other_files)]

    # Programs
    prog_map = {}
    json_runs = [run.json(prog_map, level_processes) for run in runs]

    # Connect edges
    done_edges = set()
    for prog, fi, mode, argv in edges:
        endp_prog = prog_map[prog]
        if fi in package_map:
            if level_pkgs == LVL_PKG_DROP:
                continue
            endp_file = package_map[fi].json_endpoint(fi, level_pkgs)
            e = endp_prog['name'], endp_file, mode
            if e in done_edges:
                continue
            else:
                done_edges.add(e)
        else:
            endp_file = unicode_(fi)
        if mode is None:
            endp_prog['reads'].append(endp_file)
        elif mode & FILE_WRITE:
            endp_prog['writes'].append(endp_file)
        elif mode & FILE_READ:
            endp_prog['reads'].append(endp_file)

    json_other_files.sort()

    if PY3:
        fp = target.open('w', encoding='utf-8', newline='\n')
    else:
        fp = target.open('wb')
    try:
        json.dump({'packages': sorted(json_packages,
                                      key=lambda p: p['name']),
                   'other_files': json_other_files,
                   'runs': json_runs,
                   'inputs_outputs': [
                       {'name': k, 'path': unicode_(v.path),
                        'read_by_runs': v.read_runs,
                        'written_by_runs': v.write_runs}
                       for k, v in sorted(iteritems(inputs_outputs))]},
                  fp,
                  ensure_ascii=False,
                  indent=2,
                  sort_keys=True)
    finally:
        fp.close()
Exemple #18
0
def run_from_vistrails():
    setup_logging('REPROUNZIP-VISTRAILS', logging.INFO)

    cli_version = 1
    if len(sys.argv) > 1:
        try:
            cli_version = int(sys.argv[1])
        except ValueError:
            logging.info("Compatibility mode: reprounzip-vistrails didn't get "
                         "a version number")
    if cli_version != 1:
        logging.critical("Unknown interface version %d; you are probably "
                         "using a version of reprounzip-vistrails too old for "
                         "your VisTrails package. Consider upgrading.",
                         cli_version)
        sys.exit(1)

    parser = argparse.ArgumentParser()
    parser.add_argument('unpacker')
    parser.add_argument('directory')
    parser.add_argument('run')
    parser.add_argument('--input-file', action='append', default=[])
    parser.add_argument('--output-file', action='append', default=[])
    parser.add_argument('--cmdline', action='store')

    args = parser.parse_args(sys.argv[2:])

    config = load_config(Path(args.directory) / 'config.yml', canonical=True)

    python = sys.executable
    rpuz = [python, '-m', 'reprounzip.main', args.unpacker]

    os.environ['REPROUNZIP_NON_INTERACTIVE'] = 'y'

    def cmd(lst, add=None):
        if add:
            logging.info("cmd: %s %s", ' '.join(lst), add)
            string = ' '.join(shell_escape(a) for a in (rpuz + lst))
            string += ' ' + add
            subprocess.check_call(string, shell=True,
                                  cwd=args.directory)
        else:
            logging.info("cmd: %s", ' '.join(lst))
            subprocess.check_call(rpuz + lst,
                                  cwd=args.directory)

    logging.info("reprounzip-vistrails calling reprounzip; dir=%s",
                 args.directory)

    # Parses input files from the command-line
    upload_command = []
    seen_input_names = set()
    for input_file in args.input_file:
        input_name, filename = input_file.split(':', 1)
        upload_command.append('%s:%s' % (filename, input_name))
        seen_input_names.add(input_name)

    # Resets the input files that are used by this run and were not given
    for name, f in iteritems(config.inputs_outputs):
        if name not in seen_input_names and int(args.run) in f.read_runs:
            upload_command.append(':%s' % name)

    # Runs the command
    cmd(['upload', '.'] + upload_command)

    # Runs the experiment
    if args.cmdline:
        cmd(['run', '.', args.run, '--cmdline'], add=args.cmdline)
    else:
        cmd(['run', '.', args.run])

    # Gets output files
    for output_file in args.output_file:
        output_name, filename = output_file.split(':', 1)
        cmd(['download', '.',
             '%s:%s' % (output_name, filename)])
Exemple #19
0
def graph_json(target, runs, packages, other_files, package_map, edges,
               inputs_outputs, inputs_outputs_map, level_pkgs, level_processes,
               level_other_files):
    """Writes a JSON file suitable for further processing.
    """
    # Packages
    if level_pkgs in (LVL_PKG_IGNORE, LVL_PKG_DROP):
        json_packages = []
    else:
        json_packages = [pkg.json(level_pkgs) for pkg in packages]

    # Other files
    json_other_files = [unicode_(fi) for fi in sorted(other_files)]

    # Programs
    prog_map = {}
    json_runs = [run.json(prog_map, level_processes) for run in runs]

    # Connect edges
    done_edges = set()
    for prog, fi, mode, argv in edges:
        endp_prog = prog_map[prog]
        if fi in package_map:
            if level_pkgs == LVL_PKG_DROP:
                continue
            endp_file = package_map[fi].json_endpoint(fi, level_pkgs)
            e = endp_prog['name'], endp_file, mode
            if e in done_edges:
                continue
            else:
                done_edges.add(e)
        else:
            endp_file = unicode_(fi)
        if mode is None:
            endp_prog['reads'].append(endp_file)
            # TODO: argv?
        elif mode & FILE_WRITE:
            endp_prog['writes'].append(endp_file)
        elif mode & FILE_READ:
            endp_prog['reads'].append(endp_file)

    json_other_files.sort()

    if PY3:
        fp = target.open('w', encoding='utf-8', newline='\n')
    else:
        fp = target.open('wb')
    try:
        json.dump(
            {
                'packages':
                sorted(json_packages, key=lambda p: p['name']),
                'other_files':
                json_other_files,
                'runs':
                json_runs,
                'inputs_outputs': [{
                    'name': k,
                    'path': unicode_(v.path),
                    'read_by_runs': v.read_runs,
                    'written_by_runs': v.write_runs
                } for k, v in sorted(iteritems(inputs_outputs))]
            },
            fp,
            ensure_ascii=False,
            indent=2,
            sort_keys=True)
    finally:
        fp.close()
Exemple #20
0
    def run(self, files):
        reprounzip.common.record_usage(upload_files=len(files))
        inputs_outputs = self.get_config().inputs_outputs

        # No argument: list all the input files and exit
        if not files:
            print("Input files:")
            for input_name in sorted(n for n, f in iteritems(inputs_outputs)
                                     if f.read_runs):
                assigned = self.input_files.get(input_name)
                if assigned is None:
                    assigned = "(original)"
                elif assigned is False:
                    assigned = "(not created)"
                elif assigned is True:
                    assigned = "(generated)"
                else:
                    assert isinstance(assigned, (bytes, unicode_))
                print("    %s: %s" % (input_name, assigned))
            return

        self.prepare_upload(files)

        try:
            # Upload files
            for filespec in files:
                filespec_split = filespec.rsplit(':', 1)
                if len(filespec_split) != 2:
                    logging.critical("Invalid file specification: %r",
                                     filespec)
                    sys.exit(1)
                local_path, input_name = filespec_split

                if input_name.startswith('/'):
                    input_path = PosixPath(input_name)
                else:
                    try:
                        input_path = inputs_outputs[input_name].path
                    except KeyError:
                        logging.critical("Invalid input file: %r", input_name)
                        sys.exit(1)

                temp = None

                if not local_path:
                    # Restore original file from pack
                    logging.debug("Restoring input file %s", input_path)
                    fd, temp = Path.tempfile(prefix='reprozip_input_')
                    os.close(fd)
                    local_path = self.extract_original_input(
                        input_name, input_path, temp)
                    if local_path is None:
                        temp.remove()
                        logging.warning(
                            "No original packed, can't restore "
                            "input file %s", input_name)
                        continue
                else:
                    local_path = Path(local_path)
                    logging.debug("Uploading file %s to %s", local_path,
                                  input_path)
                    if not local_path.exists():
                        logging.critical("Local file %s doesn't exist",
                                         local_path)
                        sys.exit(1)

                self.upload_file(local_path, input_path)

                if temp is not None:
                    temp.remove()
                    self.input_files.pop(input_name, None)
                else:
                    self.input_files[input_name] = local_path.absolute().path
        finally:
            self.finalize()
Exemple #21
0
def get_package_info(pack, read_data=False):
    """Get information about a package.
    """
    runs, packages, other_files = config = load_config(pack)
    inputs_outputs = config.inputs_outputs

    information = {}

    if read_data:
        total_size = 0
        total_paths = 0
        files = 0
        dirs = 0
        symlinks = 0
        hardlinks = 0
        others = 0

        rpz_pack = RPZPack(pack)
        for m in rpz_pack.list_data():
            total_size += m.size
            total_paths += 1
            if m.isfile():
                files += 1
            elif m.isdir():
                dirs += 1
            elif m.issym():
                symlinks += 1
            elif hasattr(m, 'islnk') and m.islnk():
                hardlinks += 1
            else:
                others += 1
        rpz_pack.close()

        information['pack'] = {
            'total_size': total_size,
            'total_paths': total_paths,
            'files': files,
            'dirs': dirs,
            'symlinks': symlinks,
            'hardlinks': hardlinks,
            'others': others,
        }

    total_paths = 0
    packed_packages_files = 0
    unpacked_packages_files = 0
    packed_packages = 0
    for package in packages:
        nb = len(package.files)
        total_paths += nb
        if package.packfiles:
            packed_packages_files += nb
            packed_packages += 1
        else:
            unpacked_packages_files += nb
    nb = len(other_files)
    total_paths += nb

    information['meta'] = {
        'total_paths': total_paths,
        'packed_packages_files': packed_packages_files,
        'unpacked_packages_files': unpacked_packages_files,
        'packages': len(packages),
        'packed_packages': packed_packages,
        'packed_paths': packed_packages_files + nb,
    }

    if runs:
        architecture = runs[0]['architecture']
        if any(r['architecture'] != architecture for r in runs):
            logging.warning("Runs have different architectures")
        information['meta']['architecture'] = architecture
        distribution = runs[0]['distribution']
        if any(r['distribution'] != distribution for r in runs):
            logging.warning("Runs have different distributions")
        information['meta']['distribution'] = distribution

        information['runs'] = [
            dict((k, run[k]) for k in [
                'id', 'binary', 'argv', 'environ', 'workingdir', 'signal',
                'exitcode'
            ] if k in run) for run in runs
        ]

    information['inputs_outputs'] = {
        name: {
            'path': str(iofile.path),
            'read_runs': iofile.read_runs,
            'write_runs': iofile.write_runs
        }
        for name, iofile in iteritems(inputs_outputs)
    }

    # Unpacker compatibility
    unpacker_status = {}
    for name, upk in iteritems(unpackers):
        if 'test_compatibility' in upk:
            compat = upk['test_compatibility']
            if callable(compat):
                compat = compat(pack, config=config)
            if isinstance(compat, (tuple, list)):
                compat, msg = compat
            else:
                msg = None
            unpacker_status.setdefault(compat, []).append((name, msg))
        else:
            unpacker_status.setdefault(None, []).append((name, None))
    information['unpacker_status'] = unpacker_status

    return information
Exemple #22
0
def _print_package_info(pack, info, verbosity=1):
    print("Pack file: %s" % pack)
    print("\n----- Pack information -----")
    print("Compressed size: %s" % hsize(pack.size()))

    info_pack = info.get('pack')
    if info_pack:
        if 'total_size' in info_pack:
            print("Unpacked size: %s" % hsize(info_pack['total_size']))
        if 'total_paths' in info_pack:
            print("Total packed paths: %d" % info_pack['total_paths'])
        if verbosity >= 3:
            print("    Files: %d" % info_pack['files'])
            print("    Directories: %d" % info_pack['dirs'])
            if info_pack.get('symlinks'):
                print("    Symbolic links: %d" % info_pack['symlinks'])
            if info_pack.get('hardlinks'):
                print("    Hard links: %d" % info_pack['hardlinks'])
        if info_pack.get('others'):
            print("    Unknown (what!?): %d" % info_pack['others'])
    print("\n----- Metadata -----")
    info_meta = info['meta']
    if verbosity >= 3:
        print("Total paths: %d" % info_meta['total_paths'])
        print("Listed packed paths: %d" % info_meta['packed_paths'])
    if info_meta.get('packages'):
        print("Total software packages: %d" % info_meta['packages'])
        print("Packed software packages: %d" % info_meta['packed_packages'])
        if verbosity >= 3:
            print("Files from packed software packages: %d" %
                  info_meta['packed_packages_files'])
            print("Files from unpacked software packages: %d" %
                  info_meta['unpacked_packages_files'])
    if 'architecture' in info_meta:
        print("Architecture: %s (current: %s)" %
              (info_meta['architecture'], platform.machine().lower()))
    if 'distribution' in info_meta:
        distribution = ' '.join(t for t in info_meta['distribution'] if t)
        current_distribution = platform.linux_distribution()[0:2]
        current_distribution = ' '.join(t for t in current_distribution if t)
        print("Distribution: %s (current: %s)" %
              (distribution, current_distribution or "(not Linux)"))
    if 'runs' in info:
        runs = info['runs']
        print("Runs (%d):" % len(runs))
        for run in runs:
            cmdline = ' '.join(shell_escape(a) for a in run['argv'])
            if len(runs) == 1 and run['id'] == "run0":
                print("    %s" % cmdline)
            else:
                print("    %s: %s" % (run['id'], cmdline))
            if verbosity >= 2:
                print("        wd: %s" % run['workingdir'])
                if 'signal' in run:
                    print("        signal: %d" % run['signal'])
                else:
                    print("        exitcode: %d" % run['exitcode'])
                if run.get('walltime') is not None:
                    print("        walltime: %s" % run['walltime'])

    inputs_outputs = info.get('inputs_outputs')
    if inputs_outputs:
        if verbosity < 2:
            print("Inputs/outputs files (%d): %s" %
                  (len(inputs_outputs), ", ".join(sorted(inputs_outputs))))
        else:
            print("Inputs/outputs files (%d):" % len(inputs_outputs))
            for name, f in sorted(iteritems(inputs_outputs)):
                t = []
                if f['read_runs']:
                    t.append("in")
                if f['write_runs']:
                    t.append("out")
                print("    %s (%s): %s" % (name, ' '.join(t), f['path']))

    unpacker_status = info.get('unpacker_status')
    if unpacker_status:
        print("\n----- Unpackers -----")
        for s, n in [(COMPAT_OK, "Compatible"), (COMPAT_MAYBE, "Unknown"),
                     (COMPAT_NO, "Incompatible")]:
            if s != COMPAT_OK and verbosity < 2:
                continue
            if s not in unpacker_status:
                continue
            upks = unpacker_status[s]
            print("%s (%d):" % (n, len(upks)))
            for upk_name, msg in upks:
                if msg is not None:
                    print("    %s (%s)" % (upk_name, msg))
                else:
                    print("    %s" % upk_name)
def docker_run(args):
    """Runs the experiment in the container.
    """
    target = Path(args.target[0])
    unpacked_info = read_dict(target)
    cmdline = args.cmdline

    # Sanity check
    if args.detach and args.x11:
        logger.critical("Error: Can't use X11 forwarding if you're detaching")
        raise UsageError

    # Loads config
    config = load_config(target / 'config.yml', True)
    runs = config.runs

    selected_runs = get_runs(runs, args.run, cmdline)

    # Get current image name
    if 'current_image' in unpacked_info:
        image = unpacked_info['current_image']
        logger.debug("Running from image %s", image.decode('ascii'))
    else:
        logger.critical("Image doesn't exist yet, have you run setup/build?")
        sys.exit(1)

    # Name of new container
    if args.detach:
        container = make_unique_name(b'reprounzip_detached_')
    else:
        container = make_unique_name(b'reprounzip_run_')

    hostname = runs[selected_runs[0]].get('hostname', 'reprounzip')

    # Port forwarding
    port_options = []
    for port_host, port_container, proto in parse_ports(args.expose_port):
        port_options.extend(['-p',
                             '%s:%s/%s' % (port_host, port_container, proto)])

    # X11 handler
    if args.x11:
        local_ip = get_local_addr()

        docker_host = local_ip
        if os.environ.get('DOCKER_HOST'):
            m = _dockerhost_re.match(os.environ['DOCKER_HOST'])
            if m is not None:
                docker_host = m.group(1)

        if args.tunneled_x11:
            x11 = X11Handler(True, ('internet', docker_host), args.x11_display)
        else:
            x11 = X11Handler(True, ('internet', local_ip), args.x11_display)

            if (docker_host != local_ip and docker_host != 'localhost' and
                    not docker_host.startswith('127.') and
                    not docker_host.startswith('192.168.99.')):
                ssh_cmdline = ' '.join(
                    '-R*:%(p)d:127.0.0.1:%(p)d' % {'p': port}
                    for port, connector in x11.port_forward)
                logger.warning(
                    "You requested X11 forwarding but the Docker container "
                    "appears to be running remotely. It is probable that it "
                    "won't be able to connect to the local display. Creating "
                    "a remote SSH tunnel and running with --tunneled-x11 "
                    "might help (%s).",
                    ssh_cmdline)
    else:
        x11 = X11Handler(False, ('local', hostname), args.x11_display)

    cmd = []
    for run_number in selected_runs:
        run = runs[run_number]
        env_set, env_unset = x11.env_fixes(run['environ'])
        a_env_set, a_env_unset = parse_environment_args(args)
        env_set.update(a_env_set)
        env_unset.extend(a_env_unset)
        if env_set or env_unset:
            cmd.append('env')
            env = []
            for k in env_unset:
                env.append('-u')
                env.append(shell_escape(k))
            for k, v in iteritems(env_set):
                env.append('%s=%s' % (shell_escape(k), shell_escape(v)))
            cmd.append(' '.join(env))
        # FIXME : Use exec -a or something if binary != argv[0]
        if cmdline is not None:
            cmd.append('cmd')
            cmd.append(' '.join(shell_escape(a) for a in cmdline))
        cmd.append('run')
        cmd.append('%d' % run_number)
    cmd = list(chain.from_iterable([['do', shell_escape(c)]
                                    for c in x11.init_cmds] +
                                   [cmd]))
    if logger.isEnabledFor(logging.DEBUG):
        logger.debug("Passing arguments to Docker image:")
        for c in cmd:
            logger.debug(c)

    signals.pre_run(target=target)

    # Creates forwarders
    forwarders = []
    for port, connector in x11.port_forward:
        forwarders.append(LocalForwarder(connector, port))

    if args.detach:
        logger.info("Start container %s (detached)",
                    container.decode('ascii'))
        retcode = interruptible_call(args.docker_cmd.split() +
                                     ['run', b'--name=' + container,
                                      '-h', hostname,
                                      '-d', '-t'] +
                                     port_options +
                                     args.docker_option +
                                     [image] + cmd)
        if retcode != 0:
            logger.critical("docker run failed with code %d", retcode)
            subprocess.call(['docker', 'rm', '-f', container])
            sys.exit(1)
        return

    # Run command in container
    logger.info("Starting container %s", container.decode('ascii'))
    retcode = interruptible_call(args.docker_cmd.split() +
                                 ['run', b'--name=' + container,
                                  '-h', hostname,
                                  '-i', '-t'] +
                                 port_options +
                                 args.docker_option +
                                 [image] + cmd,
                                 request_tty=True)

    # The image prints out the exit status(es) itself
    if retcode != 0:
        logger.critical("docker run failed with code %d", retcode)
        subprocess.call(['docker', 'rm', '-f', container])
        sys.exit(1)

    # Commit to create new image
    new_image = make_unique_name(b'reprounzip_image_')
    logger.info("Committing container %s to image %s",
                container.decode('ascii'), new_image.decode('ascii'))
    subprocess.check_call(args.docker_cmd.split() +
                          ['commit', container, new_image])

    # Update image name
    unpacked_info['current_image'] = new_image
    write_dict(target, unpacked_info)

    # Remove the container
    logger.info("Destroying container %s", container.decode('ascii'))
    retcode = subprocess.call(args.docker_cmd.split() + ['rm', container])
    if retcode != 0:
        logger.error("Error deleting container %s", container.decode('ascii'))

    # Untag previous image, unless it is the initial_image
    if image != unpacked_info['initial_image']:
        logger.info("Untagging previous image %s", image.decode('ascii'))
        subprocess.check_call(args.docker_cmd.split() + ['rmi', image])

    # Update input file status
    metadata_update_run(config, unpacked_info, selected_runs)
    write_dict(target, unpacked_info)

    signals.post_run(target=target, retcode=retcode)
Exemple #24
0
    def run(self, files):
        reprounzip.common.record_usage(upload_files=len(files))
        input_files = dict(
            (n, f.path)
            for n, f in iteritems(self.get_config().inputs_outputs)
            if f.read_runs)

        # No argument: list all the input files and exit
        if not files:
            print("Input files:")
            for input_name in input_files:
                if self.input_files.get(input_name) is not None:
                    assigned = self.input_files[input_name]
                else:
                    assigned = "(original)"
                print("    %s: %s" % (input_name, assigned))
            return

        self.prepare_upload(files)

        try:
            # Upload files
            for filespec in files:
                filespec_split = filespec.rsplit(':', 1)
                if len(filespec_split) != 2:
                    logging.critical("Invalid file specification: %r",
                                     filespec)
                    sys.exit(1)
                local_path, input_name = filespec_split

                try:
                    input_path = input_files[input_name]
                except KeyError:
                    logging.critical("Invalid input file: %r", input_name)
                    sys.exit(1)

                temp = None

                if not local_path:
                    # Restore original file from pack
                    logging.debug("Restoring input file %s", input_path)
                    fd, temp = Path.tempfile(prefix='reprozip_input_')
                    os.close(fd)
                    local_path = self.extract_original_input(input_name,
                                                             input_path,
                                                             temp)
                    if local_path is None:
                        temp.remove()
                        logging.warning("No original packed, can't restore "
                                        "input file %s", input_name)
                        continue
                else:
                    local_path = Path(local_path)
                    logging.debug("Uploading file %s to %s",
                                  local_path, input_path)
                    if not local_path.exists():
                        logging.critical("Local file %s doesn't exist",
                                         local_path)
                        sys.exit(1)

                self.upload_file(local_path, input_path)

                if temp is not None:
                    temp.remove()
                    self.input_files.pop(input_name, None)
                else:
                    self.input_files[input_name] = local_path.absolute().path
        finally:
            self.finalize()
Exemple #25
0
def showfiles(args):
    """Writes out the input and output files.

    Works both for a pack file and for an extracted directory.
    """
    def parse_run(runs, s):
        for i, run in enumerate(runs):
            if run['id'] == s:
                return i
        try:
            r = int(s)
        except ValueError:
            logging.critical("Error: Unknown run %s", s)
            raise UsageError
        if r < 0 or r >= len(runs):
            logging.critical("Error: Expected 0 <= run <= %d, got %d",
                             len(runs) - 1, r)
            sys.exit(1)
        return r

    show_inputs = args.input or not args.output
    show_outputs = args.output or not args.input

    def file_filter(fio):
        if file_filter.run is None:
            return ((show_inputs and fio.read_runs) or
                    (show_outputs and fio.write_runs))
        else:
            return ((show_inputs and file_filter.run in fio.read_runs) or
                    (show_outputs and file_filter.run in fio.write_runs))

    file_filter.run = None

    pack = Path(args.pack[0])

    if not pack.exists():
        logging.critical("Pack or directory %s does not exist", pack)
        sys.exit(1)

    if pack.is_dir():
        # Reads info from an unpacked directory
        config = load_config_file(pack / 'config.yml',
                                  canonical=True)

        # Filter files by run
        if args.run is not None:
            file_filter.run = parse_run(config.runs, args.run)

        # The '.reprounzip' file is a pickled dictionary, it contains the name
        # of the files that replaced each input file (if upload was used)
        unpacked_info = metadata_read(pack, None)
        assigned_input_files = unpacked_info.get('input_files', {})

        if show_inputs:
            shown = False
            for input_name, f in sorted(iteritems(config.inputs_outputs)):
                if f.read_runs and file_filter(f):
                    if not shown:
                        print("Input files:")
                        shown = True
                    if args.verbosity >= 2:
                        print("    %s (%s)" % (input_name, f.path))
                    else:
                        print("    %s" % input_name)

                    assigned = assigned_input_files.get(input_name)
                    if assigned is None:
                        assigned = "(original)"
                    elif assigned is False:
                        assigned = "(not created)"
                    elif assigned is True:
                        assigned = "(generated)"
                    else:
                        assert isinstance(assigned, (bytes, unicode_))
                    print("      %s" % assigned)
            if not shown:
                print("Input files: none")

        if show_outputs:
            shown = False
            for output_name, f in sorted(iteritems(config.inputs_outputs)):
                if f.write_runs and file_filter(f):
                    if not shown:
                        print("Output files:")
                        shown = True
                    if args.verbosity >= 2:
                        print("    %s (%s)" % (output_name, f.path))
                    else:
                        print("    %s" % output_name)
            if not shown:
                print("Output files: none")

    else:  # pack.is_file()
        # Reads info from a pack file
        config = load_config(pack)

        # Filter files by run
        if args.run is not None:
            file_filter.run = parse_run(config.runs, args.run)

        if any(f.read_runs for f in itervalues(config.inputs_outputs)):
            print("Input files:")
            for input_name, f in sorted(iteritems(config.inputs_outputs)):
                if f.read_runs and file_filter(f):
                    if args.verbosity >= 2:
                        print("    %s (%s)" % (input_name, f.path))
                    else:
                        print("    %s" % input_name)
        else:
            print("Input files: none")

        if any(f.write_runs for f in itervalues(config.inputs_outputs)):
            print("Output files:")
            for output_name, f in sorted(iteritems(config.inputs_outputs)):
                if f.write_runs and file_filter(f):
                    if args.verbosity >= 2:
                        print("    %s (%s)" % (output_name, f.path))
                    else:
                        print("    %s" % output_name)
        else:
            print("Output files: none")
Exemple #26
0
def docker_run(args):
    """Runs the experiment in the container.
    """
    target = Path(args.target[0])
    unpacked_info = read_dict(target)
    cmdline = args.cmdline

    # Sanity check
    if args.detach and args.x11:
        logging.critical("Error: Can't use X11 forwarding if you're detaching")
        raise UsageError

    # Loads config
    config = load_config(target / 'config.yml', True)
    runs = config.runs

    selected_runs = get_runs(runs, args.run, cmdline)

    # Get current image name
    if 'current_image' in unpacked_info:
        image = unpacked_info['current_image']
        logging.debug("Running from image %s", image.decode('ascii'))
    else:
        logging.critical("Image doesn't exist yet, have you run setup/build?")
        sys.exit(1)

    # Name of new container
    if args.detach:
        container = make_unique_name(b'reprounzip_detached_')
    else:
        container = make_unique_name(b'reprounzip_run_')

    hostname = runs[selected_runs[0]].get('hostname', 'reprounzip')

    # X11 handler
    if args.x11:
        local_ip = get_local_addr()

        docker_host = local_ip
        if os.environ.get('DOCKER_HOST'):
            m = _dockerhost_re.match(os.environ['DOCKER_HOST'])
            if m is not None:
                docker_host = m.group(1)

        if args.tunneled_x11:
            x11 = X11Handler(True, ('internet', docker_host), args.x11_display)
        else:
            x11 = X11Handler(True, ('internet', local_ip), args.x11_display)

            if (docker_host != local_ip and docker_host != 'localhost' and
                    not docker_host.startswith('127.') and
                    not docker_host.startswith('192.168.99.')):
                ssh_cmdline = ' '.join(
                    '-R*:%(p)d:127.0.0.1:%(p)d' % {'p': port}
                    for port, connector in x11.port_forward)
                logging.warning(
                    "You requested X11 forwarding but the Docker container "
                    "appears to be running remotely. It is probable that it "
                    "won't be able to connect to the local display. Creating "
                    "a remote SSH tunnel and running with --tunneled-x11 "
                    "might help (%s).",
                    ssh_cmdline)
    else:
        x11 = X11Handler(False, ('local', hostname), args.x11_display)

    cmds = []
    for run_number in selected_runs:
        run = runs[run_number]
        cmd = 'cd %s && ' % shell_escape(run['workingdir'])
        cmd += '/busybox env -i '
        environ = x11.fix_env(run['environ'])
        environ = fixup_environment(environ, args)
        cmd += ' '.join('%s=%s' % (shell_escape(k), shell_escape(v))
                        for k, v in iteritems(environ))
        cmd += ' '
        # FIXME : Use exec -a or something if binary != argv[0]
        if cmdline is None:
            argv = [run['binary']] + run['argv'][1:]
        else:
            argv = cmdline
        cmd += ' '.join(shell_escape(a) for a in argv)
        uid = run.get('uid', 1000)
        gid = run.get('gid', 1000)
        cmd = '/rpzsudo \'#%d\' \'#%d\' /busybox sh -c %s' % (
            uid, gid,
            shell_escape(cmd))
        cmds.append(cmd)
    cmds = x11.init_cmds + cmds
    cmds = ' && '.join(cmds)

    signals.pre_run(target=target)

    # Creates forwarders
    forwarders = []
    for port, connector in x11.port_forward:
        forwarders.append(LocalForwarder(connector, port))

    if args.detach:
        logging.info("Start container %s (detached)",
                     container.decode('ascii'))
        retcode = interruptible_call(['docker', 'run', b'--name=' + container,
                                      '-h', hostname,
                                      '-d', '-t'] +
                                     args.docker_option +
                                     [image, '/busybox', 'sh', '-c', cmds])
        if retcode != 0:
            logging.critical("docker run failed with code %d", retcode)
            subprocess.call(['docker', 'rm', '-f', container])
            sys.exit(1)
        return

    # Run command in container
    logging.info("Starting container %s", container.decode('ascii'))
    retcode = interruptible_call(['docker', 'run', b'--name=' + container,
                                  '-h', hostname,
                                  '-i', '-t'] +
                                 args.docker_option +
                                 [image, '/busybox', 'sh', '-c', cmds])
    if retcode != 0:
        logging.critical("docker run failed with code %d", retcode)
        subprocess.call(['docker', 'rm', '-f', container])
        sys.exit(1)

    # Get exit status from "docker inspect"
    out = subprocess.check_output(['docker', 'inspect', container])
    outjson = json.loads(out.decode('ascii'))
    if (outjson[0]["State"]["Running"] is not False or
            outjson[0]["State"]["Paused"] is not False):
        logging.error("Invalid container state after execution:\n%s",
                      json.dumps(outjson[0]["State"]))
    retcode = outjson[0]["State"]["ExitCode"]
    stderr.write("\n*** Command finished, status: %d\n" % retcode)

    # Commit to create new image
    new_image = make_unique_name(b'reprounzip_image_')
    logging.info("Committing container %s to image %s",
                 container.decode('ascii'), new_image.decode('ascii'))
    subprocess.check_call(['docker', 'commit', container, new_image])

    # Update image name
    unpacked_info['current_image'] = new_image
    write_dict(target, unpacked_info)

    # Remove the container
    logging.info("Destroying container %s", container.decode('ascii'))
    retcode = subprocess.call(['docker', 'rm', container])
    if retcode != 0:
        logging.error("Error deleting container %s", container.decode('ascii'))

    # Untag previous image, unless it is the initial_image
    if image != unpacked_info['initial_image']:
        logging.info("Untagging previous image %s", image.decode('ascii'))
        subprocess.check_call(['docker', 'rmi', image])

    # Update input file status
    metadata_update_run(config, unpacked_info, selected_runs)
    write_dict(target, unpacked_info)

    signals.post_run(target=target, retcode=retcode)
Exemple #27
0
def get_package_info(pack, read_data=False):
    """Get information about a package.
    """
    runs, packages, other_files = config = load_config(pack)
    inputs_outputs = config.inputs_outputs

    information = {}

    if read_data:
        total_size = 0
        total_paths = 0
        files = 0
        dirs = 0
        symlinks = 0
        hardlinks = 0
        others = 0

        rpz_pack = RPZPack(pack)
        for m in rpz_pack.list_data():
            total_size += m.size
            total_paths += 1
            if m.isfile():
                files += 1
            elif m.isdir():
                dirs += 1
            elif m.issym():
                symlinks += 1
            elif hasattr(m, 'islnk') and m.islnk():
                hardlinks += 1
            else:
                others += 1
        rpz_pack.close()

        information['pack'] = {
            'total_size': total_size,
            'total_paths': total_paths,
            'files': files,
            'dirs': dirs,
            'symlinks': symlinks,
            'hardlinks': hardlinks,
            'others': others,
        }

    total_paths = 0
    packed_packages_files = 0
    unpacked_packages_files = 0
    packed_packages = 0
    for package in packages:
        nb = len(package.files)
        total_paths += nb
        if package.packfiles:
            packed_packages_files += nb
            packed_packages += 1
        else:
            unpacked_packages_files += nb
    nb = len(other_files)
    total_paths += nb

    information['meta'] = {
        'total_paths': total_paths,
        'packed_packages_files': packed_packages_files,
        'unpacked_packages_files': unpacked_packages_files,
        'packages': len(packages),
        'packed_packages': packed_packages,
        'packed_paths': packed_packages_files + nb,
    }

    if runs:
        architecture = runs[0]['architecture']
        if any(r['architecture'] != architecture
               for r in runs):
            logger.warning("Runs have different architectures")
        information['meta']['architecture'] = architecture
        distribution = runs[0]['distribution']
        if any(r['distribution'] != distribution
               for r in runs):
            logger.warning("Runs have different distributions")
        information['meta']['distribution'] = distribution

        information['runs'] = [
            dict((k, run[k])
                 for k in ['id', 'binary', 'argv', 'environ',
                           'workingdir', 'signal', 'exitcode']
                 if k in run)
            for run in runs]

    information['inputs_outputs'] = {
        name: {'path': str(iofile.path),
               'read_runs': iofile.read_runs,
               'write_runs': iofile.write_runs}
        for name, iofile in iteritems(inputs_outputs)}

    # Unpacker compatibility
    unpacker_status = {}
    for name, upk in iteritems(unpackers):
        if 'test_compatibility' in upk:
            compat = upk['test_compatibility']
            if callable(compat):
                compat = compat(pack, config=config)
            if isinstance(compat, (tuple, list)):
                compat, msg = compat
            else:
                msg = None
            unpacker_status.setdefault(compat, []).append((name, msg))
        else:
            unpacker_status.setdefault(None, []).append((name, None))
    information['unpacker_status'] = unpacker_status

    return information
Exemple #28
0
    def run(self, files, all_):
        reprounzip.common.record_usage(download_files=len(files))
        output_files = {n: f.path
                        for n, f in iteritems(self.get_config().inputs_outputs)
                        if f.write_runs}

        # No argument: list all the output files and exit
        if not (all_ or files):
            print("Output files:")
            for output_name in output_files:
                print("    %s" % output_name)
            return

        # Parse the name[:path] syntax
        resolved_files = []
        all_files = set(output_files)
        for filespec in files:
            filespec_split = filespec.split(':', 1)
            if len(filespec_split) == 1:
                output_name = local_path = filespec
            elif len(filespec_split) == 2:
                output_name, local_path = filespec_split
            else:
                logging.critical("Invalid file specification: %r",
                                 filespec)
                sys.exit(1)
            local_path = Path(local_path) if local_path else None
            all_files.discard(output_name)
            resolved_files.append((output_name, local_path))

        # If all_ is set, add all the files that weren't explicitely named
        if all_:
            for output_name in all_files:
                resolved_files.append((output_name, Path(output_name)))

        self.prepare_download(resolved_files)

        success = True
        try:
            # Download files
            for output_name, local_path in resolved_files:
                try:
                    remote_path = output_files[output_name]
                except KeyError:
                    logging.critical("Invalid output file: %r", output_name)
                    sys.exit(1)

                logging.debug("Downloading file %s", remote_path)
                if local_path is None:
                    ret = self.download_and_print(remote_path)
                else:
                    ret = self.download(remote_path, local_path)
                if ret is None:
                    ret = True
                    warnings.warn("download() returned None instead of "
                                  "True/False, assuming True",
                                  category=DeprecationWarning)
                if not ret:
                    success = False
            if not success:
                sys.exit(1)
        finally:
            self.finalize()
Exemple #29
0
def generate(target, directory, all_forks=False):
    """Main function for the graph subcommand.
    """
    # In here, a file is any file on the filesystem. A binary is a file, that
    # gets executed. A process is a system-level task, identified by its pid
    # (pids don't get reused in the database).
    # What I call program is the couple (process, binary), so forking creates a
    # new program (with the same binary) and exec'ing creates a new program as
    # well (with the same process)
    # Because of this, fork+exec will create an intermediate program that
    # doesn't do anything (new process but still old binary). If that program
    # doesn't do anything worth showing on the graph, it will be erased, unless
    # all_forks is True (--all-forks).

    database = directory / 'trace.sqlite3'

    # Reads package ownership from the configuration
    configfile = directory / 'config.yml'
    if not configfile.is_file():
        logging.critical("Configuration file does not exist!\n"
                         "Did you forget to run 'reprozip trace'?\n"
                         "If not, you might want to use --dir to specify an "
                         "alternate location.")
        sys.exit(1)
    runs, packages, other_files, patterns = load_config(configfile,
                                                        canonical=False)
    packages = dict((f.path, pkg) for pkg in packages for f in pkg.files)

    if PY3:
        # On PY3, connect() only accepts unicode
        conn = sqlite3.connect(str(database))
    else:
        conn = sqlite3.connect(database.path)

    # This is a bit weird. We need to iterate on all types of events at the
    # same time, ordering by timestamp, so we decorate-sort-undecorate
    # Decoration adds timestamp (for sorting) and tags by event type, one of
    # 'process', 'open' or 'exec'

    # Reads processes from the database
    process_cursor = conn.cursor()
    process_rows = process_cursor.execute(
            '''
            SELECT id, parent, timestamp
            FROM processes
            ORDER BY id
            ''')
    processes = {}
    all_programs = []

    # ... and opened files...
    file_cursor = conn.cursor()
    file_rows = file_cursor.execute(
            '''
            SELECT name, timestamp, mode, process
            FROM opened_files
            ORDER BY id
            ''')
    binaries = set()
    files = OrderedSet()
    edges = OrderedSet()

    # ... as well as executed files.
    exec_cursor = conn.cursor()
    exec_rows = exec_cursor.execute(
            '''
            SELECT name, timestamp, process, argv
            FROM executed_files
            ORDER BY id
            ''')

    # Loop on all event lists
    logging.info("Getting all events from database...")
    rows = heapq.merge(((r[2], 'process', r) for r in process_rows),
                       ((r[1], 'open', r) for r in file_rows),
                       ((r[1], 'exec', r) for r in exec_rows))
    for ts, event_type, data in rows:
        if event_type == 'process':
            r_id, r_parent, r_timestamp = data
            if r_parent is not None:
                parent = processes[r_parent]
                binary = parent.binary
            else:
                parent = None
                binary = None
            p = Process(r_id,
                        parent,
                        r_timestamp,
                        False,
                        binary,
                        C_INITIAL if r_parent is None else C_FORK)
            processes[r_id] = p
            all_programs.append(p)

        elif event_type == 'open':
            r_name, r_timestamp, r_mode, r_process = data
            r_name = PosixPath(r_name)
            if r_mode != FILE_WDIR:
                process = processes[r_process]
                files.add(r_name)
                edges.add((process, r_name, r_mode, None))

        elif event_type == 'exec':
            r_name, r_timestamp, r_process, r_argv = data
            r_name = PosixPath(r_name)
            process = processes[r_process]
            binaries.add(r_name)
            # Here we split this process in two "programs", unless the previous
            # one hasn't done anything since it was created via fork()
            if not all_forks and not process.acted:
                process.binary = r_name
                process.created = C_FORKEXEC
                process.acted = True
            else:
                process = Process(process.pid,
                                  process,
                                  r_timestamp,
                                  True,         # Hides exec only once
                                  r_name,
                                  C_EXEC)
                all_programs.append(process)
                processes[r_process] = process
            argv = tuple(r_argv.split('\0'))
            if not argv[-1]:
                argv = argv[:-1]
            edges.add((process, r_name, None, argv))

    process_cursor.close()
    file_cursor.close()
    conn.close()

    # Puts files in packages
    logging.info("Organizes packages...")
    package_files = {}
    other_files = []
    for f in files:
        pkg = packages.get(f)
        if pkg is not None:
            package_files.setdefault((pkg.name, pkg.version), []).append(f)
        else:
            other_files.append(f)

    # Writes DOT file
    with target.open('w', encoding='utf-8', newline='\n') as fp:
        fp.write('digraph G {\n    /* programs */\n    node [shape=box];\n')
        # Programs
        logging.info("Writing programs...")
        for program in all_programs:
            fp.write('    prog%d [label="%s (%d)"];\n' % (
                     id(program), program.binary or "-", program.pid))
            if program.parent is not None:
                reason = ''
                if program.created == C_FORK:
                    reason = "fork"
                elif program.created == C_EXEC:
                    reason = "exec"
                elif program.created == C_FORKEXEC:
                    reason = "fork+exec"
                fp.write('    prog%d -> prog%d [label="%s"];\n' % (
                         id(program.parent), id(program), reason))

        fp.write('\n    node [shape=ellipse];\n\n    /* system packages */\n')

        # Files from packages
        logging.info("Writing packages...")
        for i, ((name, version), files) in enumerate(iteritems(package_files)):
            fp.write('    subgraph cluster%d {\n        label=' % i)
            if version:
                fp.write('"%s %s";\n' % (escape(name), escape(version)))
            else:
                fp.write('"%s";\n' % escape(name))
            for f in files:
                fp.write('        "%s";\n' % escape(unicode_(f)))
            fp.write('    }\n')

        fp.write('\n    /* other files */\n')

        # Other files
        logging.info("Writing other files...")
        for f in other_files:
            fp.write('    "%s"\n' % escape(unicode_(f)))

        fp.write('\n')

        # Edges
        logging.info("Connecting edges...")
        for prog, f, mode, argv in edges:
            if mode is None:
                fp.write('    "%s" -> prog%d [color=blue, label="%s"];\n' % (
                         escape(unicode_(f)),
                         id(prog),
                         escape(' '.join(argv))))
            elif mode & FILE_WRITE:
                fp.write('    prog%d -> "%s" [color=red];\n' % (
                         id(prog), escape(unicode_(f))))
            elif mode & FILE_READ:
                fp.write('    "%s" -> prog%d [color=green];\n' % (
                         escape(unicode_(f)), id(prog)))

        fp.write('}\n')
Exemple #30
0
def generate(target, configfile, database, all_forks=False, graph_format='dot',
             level_pkgs='file', level_processes='thread',
             level_other_files='all',
             regex_filters=None, regex_replaces=None, aggregates=None):
    """Main function for the graph subcommand.
    """
    try:
        graph_format = {'dot': FORMAT_DOT, 'DOT': FORMAT_DOT,
                        'json': FORMAT_JSON, 'JSON': FORMAT_JSON}[graph_format]
    except KeyError:
        logging.critical("Unknown output format %r", graph_format)
        sys.exit(1)

    level_pkgs, level_processes, level_other_files, file_depth = \
        parse_levels(level_pkgs, level_processes, level_other_files)

    # Reads package ownership from the configuration
    if not configfile.is_file():
        logging.critical("Configuration file does not exist!\n"
                         "Did you forget to run 'reprozip trace'?\n"
                         "If not, you might want to use --dir to specify an "
                         "alternate location.")
        sys.exit(1)
    config = load_config(configfile, canonical=False)
    inputs_outputs = dict((f.path, n)
                          for n, f in iteritems(config.inputs_outputs))
    has_thread_flag = config.format_version >= LooseVersion('0.7')

    runs, files, edges = read_events(database, all_forks,
                                     has_thread_flag)

    # Label the runs
    if len(runs) != len(config.runs):
        logging.warning("Configuration file doesn't list the same number of "
                        "runs we found in the database!")
    else:
        for config_run, run in izip(config.runs, runs):
            run.name = config_run['id']

    # Apply regexes
    ignore = [lambda path, r=re.compile(p): r.search(path) is not None
              for p in regex_filters or []]
    replace = [lambda path, r=re.compile(p): r.sub(repl, path)
               for p, repl in regex_replaces or []]

    def filefilter(path):
        pathuni = unicode_(path)
        if any(f(pathuni) for f in ignore):
            logging.debug("IGN %s", pathuni)
            return None
        if not (replace or aggregates):
            return path
        for fi in replace:
            pathuni_ = fi(pathuni)
            if pathuni_ != pathuni:
                logging.debug("SUB %s -> %s", pathuni, pathuni_)
            pathuni = pathuni_
        for prefix in aggregates or []:
            if pathuni.startswith(prefix):
                logging.debug("AGG %s -> %s", pathuni, prefix)
                pathuni = prefix
                break
        return PosixPath(pathuni)

    files_new = set()
    for fi in files:
        fi = filefilter(fi)
        if fi is not None:
            files_new.add(fi)
    files = files_new

    edges_new = OrderedSet()
    for prog, fi, mode, argv in edges:
        fi = filefilter(fi)
        if fi is not None:
            edges_new.add((prog, fi, mode, argv))
    edges = edges_new

    # Puts files in packages
    package_map = {}
    if level_pkgs == LVL_PKG_IGNORE:
        packages = []
        other_files = files
    else:
        logging.info("Organizes packages...")
        file2package = dict((f.path, pkg)
                            for pkg in config.packages for f in pkg.files)
        packages = {}
        other_files = []
        for fi in files:
            pkg = file2package.get(fi)
            if pkg is not None:
                package = packages.get(pkg.name)
                if package is None:
                    package = Package(pkg.name, pkg.version)
                    packages[pkg.name] = package
                package.files.add(fi)
                package_map[fi] = package
            else:
                other_files.append(fi)
        packages = sorted(itervalues(packages), key=lambda pkg: pkg.name)
        for i, pkg in enumerate(packages):
            pkg.id = i

    # Filter other files
    if level_other_files == LVL_OTHER_ALL and file_depth is not None:
        other_files = set(PosixPath(*f.components[:file_depth + 1])
                          for f in other_files)
        edges = OrderedSet((prog,
                            f if f in package_map
                            else PosixPath(*f.components[:file_depth + 1]),
                            mode,
                            argv)
                           for prog, f, mode, argv in edges)
    else:
        if level_other_files == LVL_OTHER_IO:
            other_files = set(f for f in other_files if f in inputs_outputs)
            edges = [(prog, f, mode, argv)
                     for prog, f, mode, argv in edges
                     if f in package_map or f in other_files]
        elif level_other_files == LVL_OTHER_NO:
            other_files = set()
            edges = [(prog, f, mode, argv)
                     for prog, f, mode, argv in edges
                     if f in package_map]

    args = (target, runs, packages, other_files, package_map, edges,
            inputs_outputs, level_pkgs, level_processes, level_other_files)
    if graph_format == FORMAT_DOT:
        graph_dot(*args)
    elif graph_format == FORMAT_JSON:
        graph_json(*args)
    else:
        assert False
Exemple #31
0
def showfiles(args):
    """Writes out the input and output files.

    Works both for a pack file and for an extracted directory.
    """
    def parse_run(runs, s):
        for i, run in enumerate(runs):
            if run['id'] == s:
                return i
        try:
            r = int(s)
        except ValueError:
            logging.critical("Error: Unknown run %s", s)
            raise UsageError
        if r < 0 or r >= len(runs):
            logging.critical("Error: Expected 0 <= run <= %d, got %d",
                             len(runs) - 1, r)
            sys.exit(1)
        return r

    show_inputs = args.input or not args.output
    show_outputs = args.output or not args.input

    def file_filter(fio):
        if file_filter.run is None:
            return ((show_inputs and fio.read_runs)
                    or (show_outputs and fio.write_runs))
        else:
            return ((show_inputs and file_filter.run in fio.read_runs)
                    or (show_outputs and file_filter.run in fio.write_runs))

    file_filter.run = None

    pack = Path(args.pack[0])

    if not pack.exists():
        logging.critical("Pack or directory %s does not exist", pack)
        sys.exit(1)

    if pack.is_dir():
        # Reads info from an unpacked directory
        config = load_config_file(pack / 'config.yml', canonical=True)

        # Filter files by run
        if args.run is not None:
            file_filter.run = parse_run(config.runs, args.run)

        # The '.reprounzip' file is a pickled dictionary, it contains the name
        # of the files that replaced each input file (if upload was used)
        unpacked_info = metadata_read(pack, None)
        assigned_input_files = unpacked_info.get('input_files', {})

        if show_inputs:
            shown = False
            for input_name, f in sorted(iteritems(config.inputs_outputs)):
                if f.read_runs and file_filter(f):
                    if not shown:
                        print("Input files:")
                        shown = True
                    if args.verbosity >= 2:
                        print("    %s (%s)" % (input_name, f.path))
                    else:
                        print("    %s" % input_name)

                    assigned = assigned_input_files.get(input_name)
                    if assigned is None:
                        assigned = "(original)"
                    elif assigned is False:
                        assigned = "(not created)"
                    elif assigned is True:
                        assigned = "(generated)"
                    else:
                        assert isinstance(assigned, (bytes, unicode_))
                    print("      %s" % assigned)
            if not shown:
                print("Input files: none")

        if show_outputs:
            shown = False
            for output_name, f in sorted(iteritems(config.inputs_outputs)):
                if f.write_runs and file_filter(f):
                    if not shown:
                        print("Output files:")
                        shown = True
                    if args.verbosity >= 2:
                        print("    %s (%s)" % (output_name, f.path))
                    else:
                        print("    %s" % output_name)
            if not shown:
                print("Output files: none")

    else:  # pack.is_file()
        # Reads info from a pack file
        config = load_config(pack)

        # Filter files by run
        if args.run is not None:
            file_filter.run = parse_run(config.runs, args.run)

        if any(f.read_runs for f in itervalues(config.inputs_outputs)):
            print("Input files:")
            for input_name, f in sorted(iteritems(config.inputs_outputs)):
                if f.read_runs and file_filter(f):
                    if args.verbosity >= 2:
                        print("    %s (%s)" % (input_name, f.path))
                    else:
                        print("    %s" % input_name)
        else:
            print("Input files: none")

        if any(f.write_runs for f in itervalues(config.inputs_outputs)):
            print("Output files:")
            for output_name, f in sorted(iteritems(config.inputs_outputs)):
                if f.write_runs and file_filter(f):
                    if args.verbosity >= 2:
                        print("    %s (%s)" % (output_name, f.path))
                    else:
                        print("    %s" % output_name)
        else:
            print("Output files: none")
Exemple #32
0
def docker_setup_create(args):
    """Sets up the experiment to be run in a Docker-built container.
    """
    pack = Path(args.pack[0])
    target = Path(args.target[0])
    if target.exists():
        logging.critical("Target directory exists")
        sys.exit(1)

    signals.pre_setup(target=target, pack=pack)

    target.mkdir()

    try:
        # Unpacks configuration file
        rpz_pack = RPZPack(pack)
        rpz_pack.extract_config(target / 'config.yml')

        # Loads config
        runs, packages, other_files = config = load_config(
            target / 'config.yml', True)

        if args.base_image:
            record_usage(docker_explicit_base=True)
            base_image = args.base_image[0]
            if args.distribution:
                target_distribution = args.distribution[0]
            else:
                target_distribution = None
        else:
            target_distribution, base_image = select_image(runs)
        logging.info("Using base image %s", base_image)
        logging.debug("Distribution: %s", target_distribution or "unknown")

        rpz_pack.copy_data_tar(target / 'data.tgz')

        arch = runs[0]['architecture']

        # Writes Dockerfile
        logging.info("Writing %s...", target / 'Dockerfile')
        with (target / 'Dockerfile').open('w', encoding='utf-8',
                                          newline='\n') as fp:
            fp.write('FROM %s\n\n' % base_image)

            # Installs busybox
            download_file(busybox_url(arch),
                          target / 'busybox',
                          'busybox-%s' % arch)
            fp.write('COPY busybox /busybox\n')

            # Installs rpzsudo
            download_file(sudo_url(arch),
                          target / 'rpzsudo',
                          'rpzsudo-%s' % arch)
            fp.write('COPY rpzsudo /rpzsudo\n\n')

            fp.write('COPY data.tgz /reprozip_data.tgz\n\n')
            fp.write('COPY rpz-files.list /rpz-files.list\n')
            fp.write('RUN \\\n'
                     '    chmod +x /busybox /rpzsudo && \\\n')

            if args.install_pkgs:
                # Install every package through package manager
                missing_packages = []
            else:
                # Only install packages that were not packed
                missing_packages = [pkg for pkg in packages if pkg.packfiles]
                packages = [pkg for pkg in packages if not pkg.packfiles]
            if packages:
                record_usage(docker_install_pkgs=True)
                try:
                    installer = select_installer(pack, runs,
                                                 target_distribution)
                except CantFindInstaller as e:
                    logging.error("Need to install %d packages but couldn't "
                                  "select a package installer: %s",
                                  len(packages), e)
                    sys.exit(1)
                # Updates package sources
                update_script = installer.update_script()
                if update_script:
                    fp.write('    %s && \\\n' % update_script)
                # Installs necessary packages
                fp.write('    %s && \\\n' % installer.install_script(packages))
                logging.info("Dockerfile will install the %d software "
                             "packages that were not packed", len(packages))
            else:
                record_usage(docker_install_pkgs=False)

            # Untar
            paths = set()
            pathlist = []
            # Add intermediate directories, and check for existence in the tar
            logging.info("Generating file list...")
            missing_files = chain.from_iterable(pkg.files
                                                for pkg in missing_packages)
            data_files = rpz_pack.data_filenames()
            listoffiles = list(chain(other_files, missing_files))
            for f in listoffiles:
                if f.path.name == 'resolv.conf' and (
                        f.path.lies_under('/etc') or
                        f.path.lies_under('/run') or
                        f.path.lies_under('/var')):
                    continue
                path = PosixPath('/')
                for c in rpz_pack.remove_data_prefix(f.path).components:
                    path = path / c
                    if path in paths:
                        continue
                    paths.add(path)
                    if path in data_files:
                        pathlist.append(path)
                    else:
                        logging.info("Missing file %s", path)
            rpz_pack.close()
            # FIXME : for some reason we need reversed() here, I'm not sure why
            # Need to read more of tar's docs.
            # TAR bug: --no-overwrite-dir removes --keep-old-files
            with (target / 'rpz-files.list').open('wb') as lfp:
                for p in reversed(pathlist):
                    lfp.write(join_root(rpz_pack.data_prefix, p).path)
                    lfp.write(b'\0')
            fp.write('    cd / && '
                     '(tar zpxf /reprozip_data.tgz -U --recursive-unlink '
                     '--numeric-owner --strip=1 --null -T /rpz-files.list || '
                     '/busybox echo "TAR reports errors, this might or might '
                     'not prevent the execution to run")\n')

            # Setup entry point
            fp.write('COPY rpz_entrypoint.sh /rpz_entrypoint.sh\n'
                     'ENTRYPOINT ["/busybox", "sh", "/rpz_entrypoint.sh"]\n')

        # Write entry point script
        logging.info("Writing %s...", target / 'rpz_entrypoint.sh')
        with (target / 'rpz_entrypoint.sh').open('w', encoding='utf-8',
                                                 newline='\n') as fp:
            # The entrypoint gets some arguments from the run command
            # By default, it just does all the runs
            # "run N" executes the run with that number
            # "cmd STR" sets a replacement command-line for the next run
            # "do STR" executes a command as-is
            fp.write(
                '#!/bin/sh\n'
                '\n'
                'COMMAND=\n'
                'ENVVARS=\n'
                '\n'
                'if [ $# = 0 ]; then\n'
                '    exec /busybox sh /rpz_entrypoint.sh')
            for nb in irange(len(runs)):
                fp.write(' run %d' % nb)
            fp.write(
                '\n'
                'fi\n'
                '\n'
                'while [ $# != 0 ]; do\n'
                '    case "$1" in\n'
                '        help)\n'
                '            echo "Image built from reprounzip-docker" >&2\n'
                '            echo "Usage: docker run <image> [cmd word [word '
                '...]] [run <R>]" >&2\n'
                '            echo "    \\`cmd ...\\` changes the command for '
                'the next \\`run\\` option" >&2\n'
                '            echo "    \\`run <name|number>\\` runs the '
                'specified run" >&2\n'
                '            echo "By default, all the runs are executed." '
                '>&2\n'
                '            echo "The runs in this image are:" >&2\n')
            for run in runs:
                fp.write(
                    '            echo "    {name}: {cmdline}" >&2\n'.format(
                        name=run['id'],
                        cmdline=' '.join(shell_escape(a)
                                         for a in run['argv'])))
            fp.write(
                '            exit 0\n'
                '        ;;\n'
                '        do)\n'
                '            shift\n'
                '            $1\n'
                '        ;;\n'
                '        env)\n'
                '            shift\n'
                '            ENVVARS="$1"\n'
                '        ;;\n'
                '        cmd)\n'
                '            shift\n'
                '            COMMAND="$1"\n'
                '        ;;\n'
                '        run)\n'
                '            shift\n'
                '            case "$1" in\n')
            for i, run in enumerate(runs):
                cmdline = ' '.join([run['binary']] + run['argv'][1:])
                fp.write(
                    '                {name})\n'
                    '                    RUNCOMMAND={cmd}\n'
                    '                    RUNWD={wd}\n'
                    '                    RUNENV={env}\n'
                    '                    RUNUID={uid}\n'
                    '                    RUNGID={gid}\n'
                    '                ;;\n'.format(
                        name='%s|%d' % (run['id'], i),
                        cmd=shell_escape(cmdline),
                        wd=shell_escape(run['workingdir']),
                        env=shell_escape(' '.join(
                            '%s=%s' % (shell_escape(k), shell_escape(v))
                            for k, v in iteritems(run['environ']))),
                        uid=run.get('uid', 1000),
                        gid=run.get('gid', 1000)))
            fp.write(
                '                *)\n'
                '                    echo "RPZ: Unknown run $1" >&2\n'
                '                    exit 1\n'
                '                ;;\n'
                '            esac\n'
                '            if [ -n "$COMMAND" ]; then\n'
                '                RUNCOMMAND="$COMMAND"\n'
                '                COMMAND=\n'
                '            fi\n'
                '            export RUNWD; export RUNENV; export ENVVARS; '
                'export RUNCOMMAND\n'
                '            /rpzsudo "#$RUNUID" "#$RUNGID" /busybox sh -c '
                '"cd \\"\\$RUNWD\\" && /busybox env -i $RUNENV $ENVVARS '
                '$RUNCOMMAND"\n'
                '            ENVVARS=\n'
                '        ;;\n'
                '        *)\n'
                '            echo "RPZ: Unknown option $1" >&2\n'
                '            exit 1\n'
                '        ;;\n'
                '    esac\n'
                '    shift\n'
                'done\n')

        # Meta-data for reprounzip
        write_dict(target, metadata_initial_iofiles(config))

        signals.post_setup(target=target, pack=pack)
    except Exception:
        target.rmtree(ignore_errors=True)
        raise
Exemple #33
0
def directory_run(args):
    """Runs the command in the directory.
    """
    target = Path(args.target[0])
    unpacked_info = metadata_read(target, 'directory')
    cmdline = args.cmdline

    # Loads config
    config = load_config_file(target / 'config.yml', True)
    runs = config.runs

    selected_runs = get_runs(runs, args.run, cmdline)

    root = (target / 'root').absolute()

    # Gets library paths
    lib_dirs = []
    p = subprocess.Popen(['/sbin/ldconfig', '-v', '-N'],
                         stdout=subprocess.PIPE)
    try:
        for l in p.stdout:
            if len(l) < 3 or l[0] in (b' ', b'\t'):
                continue
            if l.endswith(b':\n'):
                lib_dirs.append(Path(l[:-2]))
    finally:
        p.communicate()
    lib_dirs = ('export LD_LIBRARY_PATH=%s' % ':'.join(
                shell_escape(unicode_(join_root(root, d)))
                for d in lib_dirs))

    cmds = [lib_dirs]
    for run_number in selected_runs:
        run = runs[run_number]
        cmd = 'cd %s && ' % shell_escape(
            unicode_(join_root(root,
                               Path(run['workingdir']))))
        cmd += '/usr/bin/env -i '
        environ = run['environ']
        environ = fixup_environment(environ, args)
        if args.x11:
            if 'DISPLAY' in os.environ:
                environ['DISPLAY'] = os.environ['DISPLAY']
            if 'XAUTHORITY' in os.environ:
                environ['XAUTHORITY'] = os.environ['XAUTHORITY']
        cmd += ' '.join('%s=%s' % (shell_escape(k), shell_escape(v))
                        for k, v in iteritems(environ)
                        if k != 'PATH')
        cmd += ' '

        # PATH
        # Get the original PATH components
        path = [PosixPath(d)
                for d in run['environ'].get('PATH', '').split(':')]
        # The same paths but in the directory
        dir_path = [join_root(root, d)
                    for d in path
                    if d.root == '/']
        # Rebuild string
        path = ':'.join(unicode_(d) for d in dir_path + path)
        cmd += 'PATH=%s ' % shell_escape(path)

        # FIXME : Use exec -a or something if binary != argv[0]
        if cmdline is None:
            argv = run['argv']

            # Rewrites command-line arguments that are absolute filenames
            rewritten = False
            for i in irange(len(argv)):
                try:
                    p = Path(argv[i])
                except UnicodeEncodeError:
                    continue
                if p.is_absolute:
                    rp = join_root(root, p)
                    if (rp.exists() or
                            (len(rp.components) > 3 and rp.parent.exists())):
                        argv[i] = str(rp)
                        rewritten = True
            if rewritten:
                logging.warning("Rewrote command-line as: %s",
                                ' '.join(shell_escape(a) for a in argv))
        else:
            argv = cmdline
        cmd += ' '.join(shell_escape(a) for a in argv)
        cmds.append(cmd)
    cmds = ' && '.join(cmds)

    signals.pre_run(target=target)
    retcode = interruptible_call(cmds, shell=True)
    stderr.write("\n*** Command finished, status: %d\n" % retcode)
    signals.post_run(target=target, retcode=retcode)

    # Update input file status
    metadata_update_run(config, unpacked_info, selected_runs)
    metadata_write(target, unpacked_info, 'directory')
Exemple #34
0
def generate(target, configfile, database):
    """Go over the trace and generate the graph file.
    """
    # Reads package ownership from the configuration
    if not configfile.is_file():
        logger.critical("Configuration file does not exist!\n"
                        "Did you forget to run 'reprozip trace'?\n"
                        "If not, you might want to use --dir to specify an "
                        "alternate location.")
        sys.exit(1)

    config = load_config(configfile, canonical=False)

    has_thread_flag = config.format_version >= LooseVersion('0.7')

    if PY3:
        # On PY3, connect() only accepts unicode
        conn = sqlite3.connect(str(database))
    else:
        conn = sqlite3.connect(database.path)
    conn.row_factory = sqlite3.Row

    vertices = []
    edges = []

    # Create user entity, that initiates the runs
    vertices.append({'ID': 'user',
                     'type': 'Agent',
                     'subtype': 'User',
                     'label': 'User'})

    run = -1

    # Read processes
    cur = conn.cursor()
    rows = cur.execute(
        '''
        SELECT id, parent, timestamp, is_thread, exitcode
        FROM processes;
        ''' if has_thread_flag else '''
        SELECT id, parent, timestamp, 0 as is_thread, exitcode
        FROM processes;
        ''')
    for r_id, r_parent, r_timestamp, r_isthread, r_exitcode in rows:
        if r_parent is None:
            # Create run entity
            run += 1
            vertices.append({'ID': 'run%d' % run,
                             'type': 'Activity',
                             'subtype': 'Run',
                             'label': "Run #%d" % run,
                             'date': r_timestamp})
            # User -> run
            edges.append({'ID': 'user_run%d' % run,
                          'type': 'UserRuns',
                          'label': "User runs command",
                          'sourceID': 'user',
                          'targetID': 'run%d' % run})
            # Run -> process
            edges.append({'ID': 'run_start%d' % run,
                          'type': 'RunStarts',
                          'label': "Run #%d command",
                          'sourceID': 'run%d' % run,
                          'targetID': 'process%d' % r_id})

        # Create process entity
        vertices.append({'ID': 'process%d' % r_id,
                         'type': 'Agent',
                         'subtype': 'Thread' if r_isthread else 'Process',
                         'label': 'Process #%d' % r_id,
                         'date': r_timestamp})
        # TODO: add process end time (use master branch?)

        # Add process creation activity
        if r_parent is not None:
            # Process creation activity
            vertex = {'ID': 'fork%d' % r_id,
                      'type': 'Activity',
                      'subtype': 'Fork',
                      'label': "#%d creates %s #%d" % (
                          r_parent,
                          "thread" if r_isthread else "process",
                          r_id),
                      'date': r_timestamp}
            if has_thread_flag:
                vertex['thread'] = 'true' if r_isthread else 'false'
            vertices.append(vertex)

            # Parent -> creation
            edges.append({'ID': 'fork_p_%d' % r_id,
                          'type': 'PerformsFork',
                          'label': "Performs fork",
                          'sourceID': 'process%d' % r_parent,
                          'targetID': 'fork%d' % r_id})
            # Creation -> child
            edges.append({'ID': 'fork_c_%d' % r_id,
                          'type': 'ForkCreates',
                          'label': "Fork creates",
                          'sourceID': 'fork%d' % r_id,
                          'targetID': 'process%d' % r_id})
    cur.close()

    file2package = dict((f.path.path, pkg)
                        for pkg in config.packages
                        for f in pkg.files)
    inputs_outputs = dict((f.path.path, (bool(f.write_runs),
                                         bool(f.read_runs)))
                          for n, f in iteritems(config.inputs_outputs))

    # Read opened files
    cur = conn.cursor()
    rows = cur.execute(
        '''
        SELECT name, is_directory
        FROM opened_files
        GROUP BY name;
        ''')
    for r_name, r_directory in rows:
        # Create file entity
        vertex = {'ID': r_name,
                  'type': 'Entity',
                  'subtype': 'Directory' if r_directory else 'File',
                  'label': r_name}
        if r_name in file2package:
            vertex['package'] = file2package[r_name].name
        if r_name in inputs_outputs:
            out_, in_ = inputs_outputs[r_name]
            if in_:
                vertex['input'] = True
            if out_:
                vertex['output'] = True
        vertices.append(vertex)
    cur.close()

    # Read file opens
    cur = conn.cursor()
    rows = cur.execute(
        '''
        SELECT id, name, timestamp, mode, process
        FROM opened_files;
        ''')
    for r_id, r_name, r_timestamp, r_mode, r_process in rows:
        # Create file access activity
        vertices.append({'ID': 'access%d' % r_id,
                         'type': 'Activity',
                         'subtype': ('FileWrites' if r_mode & FILE_WRITE
                                     else 'FileReads'),
                         'label': ("File write: %s" if r_mode & FILE_WRITE
                                   else "File read: %s") % r_name,
                         'date': r_timestamp,
                         'mode': r_mode})
        # Process -> access
        edges.append({'ID': 'proc_access%d' % r_id,
                      'type': 'PerformsFileAccess',
                      'label': "Process does file access",
                      'sourceID': 'process%d' % r_process,
                      'targetID': 'access%d' % r_id})
        # Access -> file
        edges.append({'ID': 'access_file%d' % r_id,
                      'type': 'AccessFile',
                      'label': "File access touches",
                      'sourceID': 'access%d' % r_id,
                      'targetID': r_name})
    cur.close()

    # Read executions
    cur = conn.cursor()
    rows = cur.execute(
        '''
        SELECT id, name, timestamp, process, argv
        FROM executed_files;
        ''')
    for r_id, r_name, r_timestamp, r_process, r_argv in rows:
        argv = r_argv.split('\0')
        if not argv[-1]:
            argv = argv[:-1]
        cmdline = ' '.join(shell_escape(a) for a in argv)

        # Create execution activity
        vertices.append({'ID': 'exec%d' % r_id,
                         'type': 'Activity',
                         'subtype': 'ProcessExecutes',
                         'label': "Process #%d executes file %s" % (r_process,
                                                                    r_name),
                         'date': r_timestamp,
                         'cmdline': cmdline,
                         'process': r_process,
                         'file': r_name})
        # Process -> execution
        edges.append({'ID': 'proc_exec%d' % r_id,
                      'type': 'ProcessExecution',
                      'label': "Process does exec()",
                      'sourceID': 'process%d' % r_process,
                      'targetID': 'exec%d' % r_id})
        # Execution -> file
        edges.append({'ID': 'exec_file%d' % r_id,
                      'type': 'ExecutionFile',
                      'label': "Execute file",
                      'sourceID': 'exec%d' % r_id,
                      'targetID': r_name})
    cur.close()

    # Write the file from the created lists
    with target.open('w', encoding='utf-8', newline='\n') as out:
        out.write('<?xml version="1.0"?>\n\n'
                  '<provenancedata xmlns:xsi="http://www.w3.org/2001/XMLSchema'
                  '-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema">\n'
                  '  <vertices>\n')

        for vertex in vertices:
            if 'date' not in vertex:
                vertex['date'] = '-1'
            tags = {}
            for k in ('ID', 'type', 'label', 'date'):
                if k not in vertex:
                    vertex.update(tags)
                    raise ValueError("Vertex is missing tag '%s': %r" % (
                                     k, vertex))
                tags[k] = vertex.pop(k)
            out.write('    <vertex>\n      ' +
                      '\n      '.join('<{k}>{v}</{k}>'.format(k=k,
                                                              v=xml_escape(v))
                                      for k, v in iteritems(tags)))
            if vertex:
                out.write('\n      <attributes>\n')
                for k, v in iteritems(vertex):
                    out.write('        <attribute>\n'
                              '          <name>{k}</name>\n'
                              '          <value>{v}</value>\n'
                              '        </attribute>\n'
                              .format(k=xml_escape(k),
                                      v=xml_escape(v)))
                out.write('      </attributes>')
            out.write('\n    </vertex>\n')
        out.write('  </vertices>\n'
                  '  <edges>\n')
        for edge in edges:
            for k in ('ID', 'type', 'label', 'sourceID', 'targetID'):
                if k not in edge:
                    raise ValueError("Edge is missing tag '%s': %r" % (
                                     k, edge))
            if 'value' not in edge:
                edge['value'] = ''
            out.write('    <edge>\n      ' +
                      '\n      '.join('<{k}>{v}</{k}>'.format(k=k,
                                                              v=xml_escape(v))
                                      for k, v in iteritems(edge)) +
                      '\n    </edge>\n')
        out.write('  </edges>\n'
                  '</provenancedata>\n')

    conn.close()
Exemple #35
0
    def run(self, files, all_):
        reprounzip.common.record_usage(download_files=len(files))
        inputs_outputs = self.get_config().inputs_outputs

        # No argument: list all the output files and exit
        if not (all_ or files):
            print("Output files:")
            for output_name in sorted(n for n, f in iteritems(inputs_outputs)
                                      if f.write_runs):
                print("    %s" % output_name)
            return

        # Parse the name[:path] syntax
        resolved_files = []
        all_files = set(n for n, f in iteritems(inputs_outputs)
                        if f.write_runs)
        for filespec in files:
            filespec_split = filespec.split(':', 1)
            if len(filespec_split) == 1:
                output_name = local_path = filespec
            elif len(filespec_split) == 2:
                output_name, local_path = filespec_split
            else:
                logging.critical("Invalid file specification: %r", filespec)
                sys.exit(1)
            local_path = Path(local_path) if local_path else None
            all_files.discard(output_name)
            resolved_files.append((output_name, local_path))

        # If all_ is set, add all the files that weren't explicitely named
        if all_:
            for output_name in all_files:
                resolved_files.append((output_name, Path(output_name)))

        self.prepare_download(resolved_files)

        success = True
        try:
            # Download files
            for output_name, local_path in resolved_files:
                if output_name.startswith('/'):
                    remote_path = PosixPath(output_name)
                else:
                    try:
                        remote_path = inputs_outputs[output_name].path
                    except KeyError:
                        logging.critical("Invalid output file: %r",
                                         output_name)
                        sys.exit(1)

                logging.debug("Downloading file %s", remote_path)
                if local_path is None:
                    ret = self.download_and_print(remote_path)
                else:
                    ret = self.download(remote_path, local_path)
                if ret is None:
                    ret = True
                    warnings.warn(
                        "download() returned None instead of "
                        "True/False, assuming True",
                        category=DeprecationWarning)
                if not ret:
                    success = False
            if not success:
                sys.exit(1)
        finally:
            self.finalize()
Exemple #36
0
def functional_tests(raise_warnings, interactive, run_vagrant, run_docker):
    # Tests on Python < 2.7.3: need to use separate reprozip Python (with known
    # working version of Python)
    if sys.version_info < (2, 7, 3):
        bug13676 = True
        if 'REPROZIP_PYTHON' not in os.environ:
            sys.stderr.write("Error: using reprozip with Python %s!\n" %
                             sys.version.split(' ', 1)[0])
            sys.exit(1)
    else:
        bug13676 = False

    rpz = [os.environ.get('REPROZIP_PYTHON', sys.executable)]
    rpuz = [os.environ.get('REPROUNZIP_PYTHON', sys.executable)]

    # Can't match on the SignalWarning category here because of a Python bug
    # http://bugs.python.org/issue22543
    if raise_warnings:
        rpz.extend(['-W', 'error:signal'])
        rpuz.extend(['-W', 'error:signal'])

    if 'COVER' in os.environ:
        rpz.extend(['-m'] + os.environ['COVER'].split(' '))
        rpuz.extend(['-m'] + os.environ['COVER'].split(' '))

    reprozip_main = tests.parent / 'reprozip/reprozip/main.py'
    reprounzip_main = tests.parent / 'reprounzip/reprounzip/main.py'

    verbose = ['-v'] * 3
    rpz.extend([reprozip_main.absolute().path] + verbose)
    rpuz.extend([reprounzip_main.absolute().path] + verbose)

    print("Command lines are:\n%r\n%r" % (rpz, rpuz))

    # ########################################
    # testrun /bin/echo
    #

    output = check_output(rpz + ['testrun', '/bin/echo', 'outputhere'])
    assert any(b' 1 | /bin/echo outputhere ' in l for l in output.splitlines())

    output = check_output(
        rpz + ['testrun', '-a', '/fake/path/echo', '/bin/echo', 'outputhere'])
    assert any(b' 1 | (/bin/echo) /fake/path/echo outputhere ' in l
               for l in output.splitlines())

    # ########################################
    # testrun multiple commands
    #

    check_call(rpz + [
        'testrun', 'bash', '-c', 'cat ../../../../../etc/passwd;'
        'cd /var/lib;'
        'cat ../../etc/group'
    ])
    check_call(rpz + ['trace', 'bash', '-c', 'cat /etc/passwd;echo'])
    check_call(
        rpz +
        ['trace', '--continue', 'sh', '-c', 'cat /etc/group;/usr/bin/id'])
    check_call(rpz + ['pack'])
    if not bug13676:
        check_call(rpuz + ['graph', 'graph.dot'])
        check_call(rpuz + ['graph', 'graph2.dot', 'experiment.rpz'])

    sudo = ['sudo', '-E']  # -E to keep REPROZIP_USAGE_STATS

    # ########################################
    # 'simple' program: trace, pack, info, unpack
    #

    # Build
    build('simple', ['simple.c'])
    # Trace
    check_call(rpz + [
        'trace', '-d', 'rpz-simple', './simple',
        (tests / 'simple_input.txt').path, 'simple_output.txt'
    ])
    orig_output_location = Path('simple_output.txt').absolute()
    assert orig_output_location.is_file()
    with orig_output_location.open(encoding='utf-8') as fp:
        assert fp.read().strip() == '42'
    orig_output_location.remove()
    # Read config
    with Path('rpz-simple/config.yml').open(encoding='utf-8') as fp:
        conf = yaml.safe_load(fp)
    other_files = set(Path(f).absolute() for f in conf['other_files'])
    expected = [Path('simple'), (tests / 'simple_input.txt')]
    assert other_files.issuperset([f.resolve() for f in expected])
    # Check input and output files
    input_files = conf['runs'][0]['input_files']
    assert (dict((k, Path(f).name) for k, f in iteritems(input_files)) == {
        'arg': b'simple_input.txt'
    })
    output_files = conf['runs'][0]['output_files']
    print(dict((k, Path(f).name) for k, f in iteritems(output_files)))
    # Here we don't test for dict equality, since we might have C coverage
    # files in the mix
    assert Path(output_files['arg']).name == b'simple_output.txt'
    # Pack
    check_call(rpz + ['pack', '-d', 'rpz-simple', 'simple.rpz'])
    Path('simple').remove()
    # Info
    check_call(rpuz + ['info', 'simple.rpz'])
    # Show files
    check_call(rpuz + ['showfiles', 'simple.rpz'])
    # Lists packages
    check_call(rpuz + ['installpkgs', '--summary', 'simple.rpz'])
    # Unpack directory
    check_call(rpuz + ['directory', 'setup', 'simple.rpz', 'simpledir'])
    # Run directory
    check_call(rpuz + ['directory', 'run', 'simpledir'])
    output_in_dir = join_root(Path('simpledir/root'), orig_output_location)
    with output_in_dir.open(encoding='utf-8') as fp:
        assert fp.read().strip() == '42'
    # Delete with wrong command (should fail)
    p = subprocess.Popen(rpuz + ['chroot', 'destroy', 'simpledir'],
                         stderr=subprocess.PIPE)
    stdout, stderr = p.communicate()
    assert p.poll() != 0
    stderr = stderr.splitlines()
    assert b"Wrong unpacker used" in stderr[0]
    assert stderr[1].startswith(b"usage: ")
    # Delete directory
    check_call(rpuz + ['directory', 'destroy', 'simpledir'])
    # Unpack chroot
    check_call(
        sudo + rpuz +
        ['chroot', 'setup', '--bind-magic-dirs', 'simple.rpz', 'simplechroot'])
    try:
        # Run chroot
        check_call(sudo + rpuz + ['chroot', 'run', 'simplechroot'])
        output_in_chroot = join_root(Path('simplechroot/root'),
                                     orig_output_location)
        with output_in_chroot.open(encoding='utf-8') as fp:
            assert fp.read().strip() == '42'
        # Get output file
        check_call(sudo + rpuz +
                   ['chroot', 'download', 'simplechroot', 'arg:output1.txt'])
        with Path('output1.txt').open(encoding='utf-8') as fp:
            assert fp.read().strip() == '42'
        # Replace input file
        check_call(sudo + rpuz + [
            'chroot', 'upload', 'simplechroot',
            '%s:arg' % (tests / 'simple_input2.txt')
        ])
        check_call(sudo + rpuz + ['chroot', 'upload', 'simplechroot'])
        # Run again
        check_call(sudo + rpuz + ['chroot', 'run', 'simplechroot'])
        output_in_chroot = join_root(Path('simplechroot/root'),
                                     orig_output_location)
        with output_in_chroot.open(encoding='utf-8') as fp:
            assert fp.read().strip() == '36'
        # Delete with wrong command (should fail)
        p = subprocess.Popen(rpuz + ['directory', 'destroy', 'simplechroot'],
                             stderr=subprocess.PIPE)
        stdout, stderr = p.communicate()
        assert p.poll() != 0
        stderr = stderr.splitlines()
        assert b"Wrong unpacker used" in stderr[0]
        assert stderr[1].startswith(b"usage:")
    finally:
        # Delete chroot
        check_call(sudo + rpuz + ['chroot', 'destroy', 'simplechroot'])

    if not (tests / 'vagrant').exists():
        check_call([
            'sudo', 'sh', '-c',
            'mkdir %(d)s; chmod 777 %(d)s' % {
                'd': tests / 'vagrant'
            }
        ])

    # Unpack Vagrant-chroot
    check_call(rpuz + [
        'vagrant', 'setup/create', '--use-chroot', 'simple.rpz',
        (tests / 'vagrant/simplevagrantchroot').path
    ])
    print("\nVagrant project set up in simplevagrantchroot")
    try:
        if run_vagrant:
            check_call(rpuz + [
                'vagrant', 'run', '--no-stdin',
                (tests / 'vagrant/simplevagrantchroot').path
            ])
            # Destroy
            check_call(rpuz + [
                'vagrant', 'destroy', (tests /
                                       'vagrant/simplevagrantchroot').path
            ])
        elif interactive:
            print("Test and press enter")
            sys.stdin.readline()
    finally:
        if (tests / 'vagrant/simplevagrantchroot').exists():
            (tests / 'vagrant/simplevagrantchroot').rmtree()
    # Unpack Vagrant without chroot
    check_call(rpuz + [
        'vagrant', 'setup/create', '--dont-use-chroot', 'simple.rpz',
        (tests / 'vagrant/simplevagrant').path
    ])
    print("\nVagrant project set up in simplevagrant")
    try:
        if run_vagrant:
            check_call(rpuz + [
                'vagrant', 'run', '--no-stdin',
                (tests / 'vagrant/simplevagrant').path
            ])
            # Destroy
            check_call(
                rpuz +
                ['vagrant', 'destroy', (tests / 'vagrant/simplevagrant').path])
        elif interactive:
            print("Test and press enter")
            sys.stdin.readline()
    finally:
        if (tests / 'vagrant/simplevagrant').exists():
            (tests / 'vagrant/simplevagrant').rmtree()

    # Unpack Docker
    check_call(rpuz + ['docker', 'setup/create', 'simple.rpz', 'simpledocker'])
    print("\nDocker project set up in simpledocker")
    try:
        if run_docker:
            check_call(rpuz + ['docker', 'setup/build', 'simpledocker'])
            check_call(rpuz + ['docker', 'run', 'simpledocker'])
            # Get output file
            check_call(
                rpuz +
                ['docker', 'download', 'simpledocker', 'arg:doutput1.txt'])
            with Path('doutput1.txt').open(encoding='utf-8') as fp:
                assert fp.read().strip() == '42'
            # Replace input file
            check_call(rpuz + [
                'docker', 'upload', 'simpledocker',
                '%s:arg' % (tests / 'simple_input2.txt')
            ])
            check_call(rpuz + ['docker', 'upload', 'simpledocker'])
            check_call(rpuz + ['showfiles', 'simpledocker'])
            # Run again
            check_call(rpuz + ['docker', 'run', 'simpledocker'])
            # Get output file
            check_call(
                rpuz +
                ['docker', 'download', 'simpledocker', 'arg:doutput2.txt'])
            with Path('doutput2.txt').open(encoding='utf-8') as fp:
                assert fp.read().strip() == '36'
            # Destroy
            check_call(rpuz + ['docker', 'destroy', 'simpledocker'])
        elif interactive:
            print("Test and press enter")
            sys.stdin.readline()
    finally:
        if Path('simpledocker').exists():
            Path('simpledocker').rmtree()

    # ########################################
    # 'threads' program: testrun
    #

    # Build
    build('threads', ['threads.c'], ['-lpthread'])
    # Trace
    check_call(rpz + ['testrun', './threads'])

    # ########################################
    # 'segv' program: testrun
    #

    # Build
    build('segv', ['segv.c'])
    # Trace
    check_call(rpz + ['testrun', './segv'])

    # ########################################
    # 'exec_echo' program: trace, pack, run --cmdline
    #

    # Build
    build('exec_echo', ['exec_echo.c'])
    # Trace
    check_call(rpz + ['trace', './exec_echo', 'originalexecechooutput'])
    # Pack
    check_call(rpz + ['pack', 'exec_echo.rpz'])
    # Unpack chroot
    check_call(sudo + rpuz +
               ['chroot', 'setup', 'exec_echo.rpz', 'echochroot'])
    try:
        # Run original command-line
        output = check_output(sudo + rpuz + ['chroot', 'run', 'echochroot'])
        assert output == b'originalexecechooutput\n'
        # Prints out command-line
        output = check_output(sudo + rpuz +
                              ['chroot', 'run', 'echochroot', '--cmdline'])
        assert any(b'./exec_echo originalexecechooutput' == s.strip()
                   for s in output.split(b'\n'))
        # Run with different command-line
        output = check_output(sudo + rpuz + [
            'chroot', 'run', 'echochroot', '--cmdline', './exec_echo',
            'changedexecechooutput'
        ])
        assert output == b'changedexecechooutput\n'
    finally:
        check_call(sudo + rpuz + ['chroot', 'destroy', 'echochroot'])

    # ########################################
    # 'exec_echo' program: testrun
    # This is built with -m32 so that we transition:
    #   python (x64) -> exec_echo (i386) -> echo (x64)
    #

    if sys.maxsize > 2**32:
        # Build
        build('exec_echo32', ['exec_echo.c'], ['-m32'])
        # Trace
        check_call(rpz + ['testrun', './exec_echo32 42'])
    else:
        print("Can't try exec_echo transitions: not running on 64bits")

    # ########################################
    # Tracing non-existing program
    #

    check_call(rpz + ['testrun', './doesntexist'])

    # ########################################
    # 'connect' program: testrun
    #

    # Build
    build('connect', ['connect.c'])
    # Trace
    stderr = check_errout(rpz + ['testrun', './connect'])
    stderr = stderr.split(b'\n')
    assert not any(b'program exited with non-zero code' in l for l in stderr)
    assert any(
        re.search(br'process connected to [0-9.]+:80', l) for l in stderr)

    # ########################################
    # Copies back coverage report
    #

    coverage = Path('.coverage')
    if coverage.exists():
        coverage.copyfile(tests.parent / '.coverage.runpy')
Exemple #37
0
def docker_run(args):
    """Runs the experiment in the container.
    """
    target = Path(args.target[0])
    unpacked_info = read_dict(target / '.reprounzip')
    cmdline = args.cmdline

    # Loads config
    runs, packages, other_files = load_config(target / 'config.yml', True)

    selected_runs = get_runs(runs, args.run, cmdline)

    # Destroy previous container
    if 'ran_container' in unpacked_info:
        container = unpacked_info.pop('ran_container')
        logging.info("Destroying previous container %s",
                     container.decode('ascii'))
        retcode = subprocess.call(['docker', 'rm', '-f', container])
        if retcode != 0:
            logging.error("Error deleting previous container %s",
                          container.decode('ascii'))
        write_dict(target / '.reprounzip', unpacked_info)

    # Use the initial image directly
    if 'current_image' in unpacked_info:
        image = unpacked_info['current_image']
        logging.debug("Running from image %s", image.decode('ascii'))
    else:
        logging.critical("Image doesn't exist yet, have you run setup/build?")
        sys.exit(1)

    # Name of new container
    container = make_unique_name(b'reprounzip_run_')

    hostname = runs[selected_runs[0]].get('hostname', 'reprounzip')

    # Get the local bridge IP
    ip_str = get_iface_addr('docker0')

    # X11 handler
    x11 = X11Handler(args.x11, ('internet', ip_str), args.x11_display)

    cmds = []
    for run_number in selected_runs:
        run = runs[run_number]
        cmd = 'cd %s && ' % shell_escape(run['workingdir'])
        cmd += '/usr/bin/env -i '
        environ = x11.fix_env(run['environ'])
        cmd += ' '.join('%s=%s' % (k, shell_escape(v))
                        for k, v in iteritems(environ))
        cmd += ' '
        # FIXME : Use exec -a or something if binary != argv[0]
        if cmdline is None:
            argv = [run['binary']] + run['argv'][1:]
        else:
            argv = cmdline
        cmd += ' '.join(shell_escape(a) for a in argv)
        uid = run.get('uid', 1000)
        cmd = 'sudo -u \'#%d\' /bin/busybox sh -c %s\n' % (uid,
                                                           shell_escape(cmd))
        cmds.append(cmd)
    cmds = x11.init_cmds + cmds
    cmds = ' && '.join(cmds)

    signals.pre_run(target=target)

    # Creates forwarders
    forwarders = []
    for port, connector in x11.port_forward:
        forwarders.append(
                LocalForwarder(connector, port))

    # Run command in container
    logging.info("Starting container %s", container.decode('ascii'))
    retcode = interruptible_call(['docker', 'run', b'--name=' + container,
                                  '-h', hostname,
                                  '-i', '-t', image,
                                  '/bin/busybox', 'sh', '-c', cmds])
    if retcode != 0:
        logging.critical("docker run failed with code %d", retcode)
        sys.exit(1)

    # Get exit status from "docker inspect"
    out = subprocess.check_output(['docker', 'inspect', container])
    outjson = json.loads(out.decode('ascii'))
    if (outjson[0]["State"]["Running"] is not False or
            outjson[0]["State"]["Paused"] is not False):
        logging.error("Invalid container state after execution:\n%s",
                      json.dumps(outjson[0]["State"]))
    retcode = outjson[0]["State"]["ExitCode"]
    sys.stderr.write("\n*** Command finished, status: %d\n" % retcode)

    # Store container name (so we can download output files)
    unpacked_info['ran_container'] = container
    write_dict(target / '.reprounzip', unpacked_info)

    signals.post_run(target=target, retcode=retcode)
Exemple #38
0
def vagrant_run(args):
    """Runs the experiment in the virtual machine.
    """
    target = Path(args.target[0])
    unpacked_info = read_dict(target)
    use_chroot = unpacked_info['use_chroot']
    cmdline = args.cmdline

    check_vagrant_version()

    # Loads config
    config = load_config(target / 'config.yml', True)
    runs = config.runs

    selected_runs = get_runs(runs, args.run, cmdline)

    hostname = runs[selected_runs[0]].get('hostname', 'reprounzip')

    # Port forwarding
    ports = parse_ports(args.expose_port)

    # If the requested ports are not a subset of the ones already set on the
    # VM, we have to update the Vagrantfile and issue `vagrant reload`, which
    # will reboot the machine
    req_ports = set(ports)
    set_ports = set(unpacked_info.get('ports', []))
    if not req_ports.issubset(set_ports):
        # Build new set of forwarded ports: the ones already set + the one just
        # requested
        # The ones we request now override the previous config
        all_ports = dict(
            (host, (guest, proto)) for host, guest, proto in set_ports)
        for host, guest, proto in req_ports:
            all_ports[host] = guest, proto
        unpacked_info['ports'] = sorted(
            (host, guest, proto)
            for host, (guest, proto) in iteritems(all_ports))

        write_vagrantfile(target, unpacked_info)
        logger.info("Some requested ports are not yet forwarded, running "
                    "'vagrant reload'")
        retcode = subprocess.call(['vagrant', 'reload', '--no-provision'],
                                  cwd=target.path)
        if retcode != 0:
            logger.critical("vagrant reload failed with code %d, aborting",
                            retcode)
            sys.exit(1)
        write_dict(target, unpacked_info)

    # X11 handler
    if unpacked_info['gui']:
        x11 = LocalX11Handler()
    else:
        x11 = X11Handler(args.x11, ('local', hostname), args.x11_display)

    cmds = []
    for run_number in selected_runs:
        run = runs[run_number]
        cmd = 'cd %s && ' % shell_escape(run['workingdir'])
        if use_chroot:
            cmd += '/busybox env -i '
        else:
            cmd += '/usr/bin/env -i '
        environ = x11.fix_env(run['environ'])
        environ = fixup_environment(environ, args)
        cmd += ' '.join('%s=%s' % (shell_escape(k), shell_escape(v))
                        for k, v in iteritems(environ))
        cmd += ' '
        # FIXME : Use exec -a or something if binary != argv[0]
        if cmdline is None:
            argv = [run['binary']] + run['argv'][1:]
        else:
            argv = cmdline
        cmd += ' '.join(shell_escape(a) for a in argv)
        uid = run.get('uid', 1000)
        gid = run.get('gid', 1000)
        if use_chroot:
            userspec = '%s:%s' % (uid, gid)
            cmd = ('chroot --userspec=%s /experimentroot '
                   '/bin/sh -c %s' % (userspec, shell_escape(cmd)))
        else:
            cmd = 'sudo -u \'#%d\' sh -c %s' % (uid, shell_escape(cmd))
        cmds.append(cmd)
    if use_chroot:
        cmds = [
            'chroot /experimentroot /bin/sh -c %s' % shell_escape(c)
            for c in x11.init_cmds
        ] + cmds
    else:
        cmds = x11.init_cmds + cmds
    cmds = ' && '.join(cmds)
    # Sets the hostname to the original experiment's machine's
    # FIXME: not reentrant: this restores the Vagrant machine's hostname after
    # the run, which might cause issues if several "reprounzip vagrant run" are
    # running at once
    cmds = ('OLD_HOSTNAME=$(/bin/hostname); /bin/hostname %s; ' % hostname +
            cmds + '; RES=$?; /bin/hostname "$OLD_HOSTNAME"; exit $RES')
    cmds = '/usr/bin/sudo /bin/sh -c %s' % shell_escape(cmds)

    # Gets vagrant SSH parameters
    info = machine_setup(target)

    signals.pre_run(target=target)

    interactive = not (args.no_stdin
                       or os.environ.get('REPROUNZIP_NON_INTERACTIVE'))
    retcode = run_interactive(info, interactive, cmds, not args.no_pty,
                              x11.port_forward)
    stderr.write("\r\n*** Command finished, status: %d\r\n" % retcode)

    # Update input file status
    metadata_update_run(config, unpacked_info, selected_runs)
    write_dict(target, unpacked_info)

    signals.post_run(target=target, retcode=retcode)
Exemple #39
0
def docker_run(args):
    """Runs the experiment in the container.
    """
    target = Path(args.target[0])
    unpacked_info = read_dict(target)
    cmdline = args.cmdline

    # Sanity check
    if args.detach and args.x11:
        logging.critical("Error: Can't use X11 forwarding if you're detaching")
        raise UsageError

    # Loads config
    config = load_config(target / 'config.yml', True)
    runs = config.runs

    selected_runs = get_runs(runs, args.run, cmdline)

    # Get current image name
    if 'current_image' in unpacked_info:
        image = unpacked_info['current_image']
        logging.debug("Running from image %s", image.decode('ascii'))
    else:
        logging.critical("Image doesn't exist yet, have you run setup/build?")
        sys.exit(1)

    # Name of new container
    if args.detach:
        container = make_unique_name(b'reprounzip_detached_')
    else:
        container = make_unique_name(b'reprounzip_run_')

    hostname = runs[selected_runs[0]].get('hostname', 'reprounzip')

    # Port forwarding
    port_options = []
    for port_host, port_container, proto in parse_ports(args.expose_port):
        port_options.extend(['-p',
                             '%s:%s%s' % (port_host, port_container, proto)])

    # X11 handler
    if args.x11:
        local_ip = get_local_addr()

        docker_host = local_ip
        if os.environ.get('DOCKER_HOST'):
            m = _dockerhost_re.match(os.environ['DOCKER_HOST'])
            if m is not None:
                docker_host = m.group(1)

        if args.tunneled_x11:
            x11 = X11Handler(True, ('internet', docker_host), args.x11_display)
        else:
            x11 = X11Handler(True, ('internet', local_ip), args.x11_display)

            if (docker_host != local_ip and docker_host != 'localhost' and
                    not docker_host.startswith('127.') and
                    not docker_host.startswith('192.168.99.')):
                ssh_cmdline = ' '.join(
                    '-R*:%(p)d:127.0.0.1:%(p)d' % {'p': port}
                    for port, connector in x11.port_forward)
                logging.warning(
                    "You requested X11 forwarding but the Docker container "
                    "appears to be running remotely. It is probable that it "
                    "won't be able to connect to the local display. Creating "
                    "a remote SSH tunnel and running with --tunneled-x11 "
                    "might help (%s).",
                    ssh_cmdline)
    else:
        x11 = X11Handler(False, ('local', hostname), args.x11_display)

    cmd = []
    for run_number in selected_runs:
        run = runs[run_number]
        env_set, env_unset = x11.env_fixes(run['environ'])
        a_env_set, a_env_unset = parse_environment_args(args)
        env_set.update(a_env_set)
        env_unset.extend(a_env_unset)
        if env_set or env_unset:
            cmd.append('env')
            env = []
            for k in env_unset:
                env.append('-u')
                env.append(shell_escape(k))
            for k, v in iteritems(env_set):
                env.append('%s=%s' % (shell_escape(k), shell_escape(v)))
            cmd.append(' '.join(env))
        # FIXME : Use exec -a or something if binary != argv[0]
        if cmdline is not None:
            cmd.append('cmd')
            cmd.append(' '.join(shell_escape(a) for a in cmdline))
        cmd.append('run')
        cmd.append('%d' % run_number)
    cmd = list(chain.from_iterable([['do', shell_escape(c)]
                                    for c in x11.init_cmds] +
                                   [cmd]))
    if logging.getLogger().isEnabledFor(logging.DEBUG):
        logging.debug("Passing arguments to Docker image:")
        for c in cmd:
            logging.debug(c)

    signals.pre_run(target=target)

    # Creates forwarders
    forwarders = []
    for port, connector in x11.port_forward:
        forwarders.append(LocalForwarder(connector, port))

    if args.detach:
        logging.info("Start container %s (detached)",
                     container.decode('ascii'))
        retcode = interruptible_call(args.docker_cmd.split() +
                                     ['run', b'--name=' + container,
                                      '-h', hostname,
                                      '-d', '-t'] +
                                     port_options +
                                     args.docker_option +
                                     [image] + cmd)
        if retcode != 0:
            logging.critical("docker run failed with code %d", retcode)
            subprocess.call(['docker', 'rm', '-f', container])
            sys.exit(1)
        return

    # Run command in container
    logging.info("Starting container %s", container.decode('ascii'))
    retcode = interruptible_call(args.docker_cmd.split() +
                                 ['run', b'--name=' + container,
                                  '-h', hostname,
                                  '-i', '-t'] +
                                 port_options +
                                 args.docker_option +
                                 [image] + cmd)
    if retcode != 0:
        logging.critical("docker run failed with code %d", retcode)
        subprocess.call(['docker', 'rm', '-f', container])
        sys.exit(1)

    # Get exit status from "docker inspect"
    out = subprocess.check_output(args.docker_cmd.split() +
                                  ['inspect', container])
    outjson = json.loads(out.decode('ascii'))
    if (outjson[0]["State"]["Running"] is not False or
            outjson[0]["State"]["Paused"] is not False):
        logging.error("Invalid container state after execution:\n%s",
                      json.dumps(outjson[0]["State"]))
    retcode = outjson[0]["State"]["ExitCode"]
    stderr.write("\n*** Command finished, status: %d\n" % retcode)

    # Commit to create new image
    new_image = make_unique_name(b'reprounzip_image_')
    logging.info("Committing container %s to image %s",
                 container.decode('ascii'), new_image.decode('ascii'))
    subprocess.check_call(args.docker_cmd.split() +
                          ['commit', container, new_image])

    # Update image name
    unpacked_info['current_image'] = new_image
    write_dict(target, unpacked_info)

    # Remove the container
    logging.info("Destroying container %s", container.decode('ascii'))
    retcode = subprocess.call(args.docker_cmd.split() + ['rm', container])
    if retcode != 0:
        logging.error("Error deleting container %s", container.decode('ascii'))

    # Untag previous image, unless it is the initial_image
    if image != unpacked_info['initial_image']:
        logging.info("Untagging previous image %s", image.decode('ascii'))
        subprocess.check_call(args.docker_cmd.split() + ['rmi', image])

    # Update input file status
    metadata_update_run(config, unpacked_info, selected_runs)
    write_dict(target, unpacked_info)

    signals.post_run(target=target, retcode=retcode)
Exemple #40
0
def _print_package_info(pack, info, verbosity=1):
    print("Pack file: %s" % pack)
    print("\n----- Pack information -----")
    print("Compressed size: %s" % hsize(pack.size()))

    info_pack = info.get('pack')
    if info_pack:
        if 'total_size' in info_pack:
            print("Unpacked size: %s" % hsize(info_pack['total_size']))
        if 'total_paths' in info_pack:
            print("Total packed paths: %d" % info_pack['total_paths'])
        if verbosity >= 3:
            print("    Files: %d" % info_pack['files'])
            print("    Directories: %d" % info_pack['dirs'])
            if info_pack.get('symlinks'):
                print("    Symbolic links: %d" % info_pack['symlinks'])
            if info_pack.get('hardlinks'):
                print("    Hard links: %d" % info_pack['hardlinks'])
        if info_pack.get('others'):
            print("    Unknown (what!?): %d" % info_pack['others'])
    print("\n----- Metadata -----")
    info_meta = info['meta']
    if verbosity >= 3:
        print("Total paths: %d" % info_meta['total_paths'])
        print("Listed packed paths: %d" % info_meta['packed_paths'])
    if info_meta.get('packages'):
        print("Total software packages: %d" % info_meta['packages'])
        print("Packed software packages: %d" % info_meta['packed_packages'])
        if verbosity >= 3:
            print("Files from packed software packages: %d" %
                  info_meta['packed_packages_files'])
            print("Files from unpacked software packages: %d" %
                  info_meta['unpacked_packages_files'])
    if 'architecture' in info_meta:
        print("Architecture: %s (current: %s)" % (info_meta['architecture'],
                                                  platform.machine().lower()))
    if 'distribution' in info_meta:
        distribution = ' '.join(t for t in info_meta['distribution'] if t)
        current_distribution = [distro.id(), distro.version()]
        current_distribution = ' '.join(t for t in current_distribution if t)
        print("Distribution: %s (current: %s)" % (
              distribution, current_distribution or "(not Linux)"))
    if 'runs' in info:
        runs = info['runs']
        print("Runs (%d):" % len(runs))
        for run in runs:
            cmdline = ' '.join(shell_escape(a) for a in run['argv'])
            if len(runs) == 1 and run['id'] == "run0":
                print("    %s" % cmdline)
            else:
                print("    %s: %s" % (run['id'], cmdline))
            if verbosity >= 2:
                print("        wd: %s" % run['workingdir'])
                if 'signal' in run:
                    print("        signal: %d" % run['signal'])
                else:
                    print("        exitcode: %d" % run['exitcode'])
                if run.get('walltime') is not None:
                    print("        walltime: %s" % run['walltime'])

    inputs_outputs = info.get('inputs_outputs')
    if inputs_outputs:
        if verbosity < 2:
            print("Inputs/outputs files (%d): %s" % (
                  len(inputs_outputs), ", ".join(sorted(inputs_outputs))))
        else:
            print("Inputs/outputs files (%d):" % len(inputs_outputs))
            for name, f in sorted(iteritems(inputs_outputs)):
                t = []
                if f['read_runs']:
                    t.append("in")
                if f['write_runs']:
                    t.append("out")
                print("    %s (%s): %s" % (name, ' '.join(t), f['path']))

    unpacker_status = info.get('unpacker_status')
    if unpacker_status:
        print("\n----- Unpackers -----")
        for s, n in [(COMPAT_OK, "Compatible"), (COMPAT_MAYBE, "Unknown"),
                     (COMPAT_NO, "Incompatible")]:
            if s != COMPAT_OK and verbosity < 2:
                continue
            if s not in unpacker_status:
                continue
            upks = unpacker_status[s]
            print("%s (%d):" % (n, len(upks)))
            for upk_name, msg in upks:
                if msg is not None:
                    print("    %s (%s)" % (upk_name, msg))
                else:
                    print("    %s" % upk_name)
Exemple #41
0
def do_vistrails(target, pack=None, **kwargs):
    """Create a VisTrails workflow that runs the experiment.

    This is called from signals after an experiment has been setup by any
    unpacker.
    """
    record_usage(do_vistrails=True)

    config = load_config(target / 'config.yml', canonical=True)

    # Writes VisTrails workflow
    bundle = target / 'vistrails.vt'
    logging.info("Writing VisTrails workflow %s...", bundle)
    vtdir = Path.tempdir(prefix='reprounzip_vistrails_')
    ids = IdScope()
    try:
        with vtdir.open('w', 'vistrail',
                        encoding='utf-8', newline='\n') as fp:
            wf = Workflow(fp, ids)

            # Directory module, refering to this directory
            d = wf.add_module('%s:Directory' % rpz_id, rpz_version)
            wf.add_function(d, 'directory',
                            [(directory_sig, str(target.resolve()))])

            connect_from = d

            for i, run in enumerate(config.runs):
                inputs = sorted(n for n, f in iteritems(config.inputs_outputs)
                                if i in f.read_runs)
                outputs = sorted(n for n, f in iteritems(config.inputs_outputs)
                                 if i in f.write_runs)
                ports = itertools.chain((('input', p) for p in inputs),
                                        (('output', p) for p in outputs))

                # Run module
                r = wf.add_module('%s:Run' % rpz_id, rpz_version)
                wf.add_function(r, 'cmdline', [
                                (string_sig,
                                 ' '.join(shell_escape(arg)
                                          for arg in run['argv']))])
                wf.add_function(r, 'run_number', [(integer_sig, i)])

                # Port specs for input/output files
                for type_, name in ports:
                    wf.add_port_spec(r, name, type_, [file_pkg_mod])

                # Draw connection
                wf.connect(connect_from, experiment_sig, 'experiment',
                           r, experiment_sig, 'experiment')
                connect_from = r

            wf.close()

        with bundle.open('wb') as fp:
            z = zipfile.ZipFile(fp, 'w')
            with vtdir.in_dir():
                for path in Path('.').recursedir():
                    z.write(str(path))
            z.close()
    finally:
        vtdir.rmtree()
Exemple #42
0
def generate(target,
             configfile,
             database,
             all_forks=False,
             graph_format='dot',
             level_pkgs='file',
             level_processes='thread',
             level_other_files='all',
             regex_filters=None,
             regex_replaces=None,
             aggregates=None):
    """Main function for the graph subcommand.
    """
    try:
        graph_format = {
            'dot': FORMAT_DOT,
            'DOT': FORMAT_DOT,
            'json': FORMAT_JSON,
            'JSON': FORMAT_JSON
        }[graph_format]
    except KeyError:
        logging.critical("Unknown output format %r", graph_format)
        sys.exit(1)

    level_pkgs, level_processes, level_other_files, file_depth = \
        parse_levels(level_pkgs, level_processes, level_other_files)

    # Reads package ownership from the configuration
    if not configfile.is_file():
        logging.critical("Configuration file does not exist!\n"
                         "Did you forget to run 'reprozip trace'?\n"
                         "If not, you might want to use --dir to specify an "
                         "alternate location.")
        sys.exit(1)
    config = load_config(configfile, canonical=False)
    inputs_outputs = dict(
        (f.path, n) for n, f in iteritems(config.inputs_outputs))
    has_thread_flag = config.format_version >= LooseVersion('0.7')

    runs, files, edges = read_events(database, all_forks, has_thread_flag)

    # Label the runs
    if len(runs) != len(config.runs):
        logging.warning("Configuration file doesn't list the same number of "
                        "runs we found in the database!")
    else:
        for config_run, run in izip(config.runs, runs):
            run.name = config_run['id']

    # Apply regexes
    ignore = [
        lambda path, r=re.compile(p): r.search(path) is not None
        for p in regex_filters or []
    ]
    replace = [
        lambda path, r=re.compile(p): r.sub(repl, path)
        for p, repl in regex_replaces or []
    ]

    def filefilter(path):
        pathuni = unicode_(path)
        if any(f(pathuni) for f in ignore):
            logging.debug("IGN %s", pathuni)
            return None
        if not (replace or aggregates):
            return path
        for fi in replace:
            pathuni_ = fi(pathuni)
            if pathuni_ != pathuni:
                logging.debug("SUB %s -> %s", pathuni, pathuni_)
            pathuni = pathuni_
        for prefix in aggregates or []:
            if pathuni.startswith(prefix):
                logging.debug("AGG %s -> %s", pathuni, prefix)
                pathuni = prefix
                break
        return PosixPath(pathuni)

    files_new = set()
    for fi in files:
        fi = filefilter(fi)
        if fi is not None:
            files_new.add(fi)
    files = files_new

    edges_new = OrderedSet()
    for prog, fi, mode, argv in edges:
        fi = filefilter(fi)
        if fi is not None:
            edges_new.add((prog, fi, mode, argv))
    edges = edges_new

    # Puts files in packages
    package_map = {}
    if level_pkgs == LVL_PKG_IGNORE:
        packages = []
        other_files = files
    else:
        logging.info("Organizes packages...")
        file2package = dict(
            (f.path, pkg) for pkg in config.packages for f in pkg.files)
        packages = {}
        other_files = []
        for fi in files:
            pkg = file2package.get(fi)
            if pkg is not None:
                package = packages.get(pkg.name)
                if package is None:
                    package = Package(pkg.name, pkg.version)
                    packages[pkg.name] = package
                package.files.add(fi)
                package_map[fi] = package
            else:
                other_files.append(fi)
        packages = sorted(itervalues(packages), key=lambda pkg: pkg.name)
        for i, pkg in enumerate(packages):
            pkg.id = i

    # Filter other files
    if level_other_files == LVL_OTHER_ALL and file_depth is not None:
        other_files = set(
            PosixPath(*f.components[:file_depth + 1]) for f in other_files)
        edges = OrderedSet((prog, f if f in package_map else PosixPath(
            *f.components[:file_depth + 1]), mode, argv)
                           for prog, f, mode, argv in edges)
    else:
        if level_other_files == LVL_OTHER_IO:
            other_files = set(f for f in other_files if f in inputs_outputs)
            edges = [(prog, f, mode, argv) for prog, f, mode, argv in edges
                     if f in package_map or f in other_files]
        elif level_other_files == LVL_OTHER_NO:
            other_files = set()
            edges = [(prog, f, mode, argv) for prog, f, mode, argv in edges
                     if f in package_map]

    args = (target, runs, packages, other_files, package_map, edges,
            inputs_outputs, level_pkgs, level_processes, level_other_files)
    if graph_format == FORMAT_DOT:
        graph_dot(*args)
    elif graph_format == FORMAT_JSON:
        graph_json(*args)
    else:
        assert False
Exemple #43
0
def vagrant_run(args):
    """Runs the experiment in the virtual machine.
    """
    target = Path(args.target[0])
    unpacked_info = read_dict(target)
    use_chroot = unpacked_info.get('use_chroot', True)
    cmdline = args.cmdline

    check_vagrant_version()

    # Loads config
    config = load_config(target / 'config.yml', True)
    runs = config.runs

    selected_runs = get_runs(runs, args.run, cmdline)

    hostname = runs[selected_runs[0]].get('hostname', 'reprounzip')

    # X11 handler
    x11 = X11Handler(args.x11, ('local', hostname), args.x11_display)

    cmds = []
    for run_number in selected_runs:
        run = runs[run_number]
        cmd = 'cd %s && ' % shell_escape(run['workingdir'])
        if use_chroot:
            cmd += '/busybox env -i '
        else:
            cmd += '/usr/bin/env -i '
        environ = x11.fix_env(run['environ'])
        environ = fixup_environment(environ, args)
        cmd += ' '.join('%s=%s' % (k, shell_escape(v))
                        for k, v in iteritems(environ))
        cmd += ' '
        # FIXME : Use exec -a or something if binary != argv[0]
        if cmdline is None:
            argv = [run['binary']] + run['argv'][1:]
        else:
            argv = cmdline
        cmd += ' '.join(shell_escape(a) for a in argv)
        uid = run.get('uid', 1000)
        gid = run.get('gid', 1000)
        if use_chroot:
            userspec = '%s:%s' % (uid, gid)
            cmd = ('chroot --userspec=%s /experimentroot '
                   '/bin/sh -c %s' % (
                       userspec,
                       shell_escape(cmd)))
        else:
            cmd = 'sudo -u \'#%d\' sh -c %s' % (uid, shell_escape(cmd))
        cmds.append(cmd)
    if use_chroot:
        cmds = ['chroot /experimentroot /bin/sh -c %s' % shell_escape(c)
                for c in x11.init_cmds] + cmds
    else:
        cmds = x11.init_cmds + cmds
    cmds = ' && '.join(cmds)
    # Sets the hostname to the original experiment's machine's
    # FIXME: not reentrant: this restores the Vagrant machine's hostname after
    # the run, which might cause issues if several "reprounzip vagrant run" are
    # running at once
    cmds = ('OLD_HOSTNAME=$(/bin/hostname); /bin/hostname %s; ' % hostname +
            cmds +
            '; RES=$?; /bin/hostname "$OLD_HOSTNAME"; exit $RES')
    cmds = '/usr/bin/sudo /bin/sh -c %s' % shell_escape(cmds)

    # Gets vagrant SSH parameters
    info = machine_setup(target, unpacked_info['use_chroot'])

    signals.pre_run(target=target)

    interactive = not (args.no_stdin or
                       os.environ.get('REPROUNZIP_NON_INTERACTIVE'))
    retcode = run_interactive(info, interactive,
                              cmds,
                              not args.no_pty,
                              x11.port_forward)
    stderr.write("\r\n*** Command finished, status: %d\r\n" % retcode)

    # Update input file status
    metadata_update_run(config, unpacked_info, selected_runs)
    write_dict(target, unpacked_info)

    signals.post_run(target=target, retcode=retcode)
Exemple #44
0
class X11Handler(object):
    """X11 handler.

    This selects a way to connect to the local X server and an authentication
    mechanism. If provides `fix_env()` to set the X environment variable for
    the experiment, `init_cmds` to setup X before running the experiment's main
    commands, and `port_forward` which describes the reverse port tunnels from
    the experiment to the local X server.
    """
    DISPLAY_NUMBER = 15

    SOCK2X = {
        socket.AF_INET: Xauth.FAMILY_INTERNET,
        socket.AF_INET6: Xauth.FAMILY_INTERNET6
    }
    X2SOCK = dict((v, k) for k, v in iteritems(SOCK2X))

    def __init__(self, enabled, target, display=None):
        self.enabled = enabled
        if not self.enabled:
            return

        self.target = target

        self.xauth = PosixPath('/.reprounzip_xauthority')
        self.display = display if display is not None else self.DISPLAY_NUMBER
        logging.debug(
            "X11 support enabled; will create Xauthority file %s "
            "for experiment. Display number is %d", self.xauth, self.display)

        # List of addresses that match the $DISPLAY variable
        possible, local_display = self._locate_display()
        tcp_portnum = ((6000 +
                        local_display) if local_display is not None else None)

        if ('XAUTHORITY' in os.environ
                and Path(os.environ['XAUTHORITY']).is_file()):
            xauthority = Path(os.environ['XAUTHORITY'])
        # Note: I'm assuming here that Xauthority has no XDG support
        else:
            xauthority = Path('~').expand_user() / '.Xauthority'

        # Read Xauthority file
        xauth_entries = {}
        if xauthority.is_file():
            with xauthority.open('rb') as fp:
                fp.seek(0, os.SEEK_END)
                size = fp.tell()
                fp.seek(0, os.SEEK_SET)
                while fp.tell() < size:
                    entry = Xauth.from_file(fp)
                    if (entry.name == 'MIT-MAGIC-COOKIE-1'
                            and entry.number == local_display):
                        if entry.family == Xauth.FAMILY_LOCAL:
                            xauth_entries[(entry.family, None)] = entry
                        elif (entry.family == Xauth.FAMILY_INTERNET
                              or entry.family == Xauth.FAMILY_INTERNET6):
                            xauth_entries[(entry.family,
                                           entry.address)] = entry
        # FIXME: this completely ignores addresses

        logging.debug("Possible X endpoints: %s", (possible, ))

        # Select socket and authentication cookie
        self.xauth_record = None
        self.connection_info = None
        for family, address in possible:
            # Checks that we have a cookie
            entry = family, (None if family is Xauth.FAMILY_LOCAL else address)
            if entry not in xauth_entries:
                continue
            if family == Xauth.FAMILY_LOCAL and hasattr(socket, 'AF_UNIX'):
                # Checks that the socket exists
                if not Path(address).exists():
                    continue
                self.connection_info = (socket.AF_UNIX, socket.SOCK_STREAM,
                                        address)
                self.xauth_record = xauth_entries[(family, None)]
                logging.debug(
                    "Will connect to local X display via UNIX "
                    "socket %s", address)
                break
            else:
                # Checks that we have a cookie
                family = self.X2SOCK[family]
                self.connection_info = (family, socket.SOCK_STREAM,
                                        (address, tcp_portnum))
                self.xauth_record = xauth_entries[(family, address)]
                logging.debug("Will connect to X display %s:%d via %s/TCP",
                              address, tcp_portnum,
                              "IPv6" if family == socket.AF_INET6 else "IPv4")
                break

        # Didn't find an Xauthority record -- assume no authentication is
        # needed, but still set self.connection_info
        if self.connection_info is None:
            for family, address in possible:
                # Only try UNIX sockets, we'll use 127.0.0.1 otherwise
                if family == Xauth.FAMILY_LOCAL:
                    if not hasattr(socket, 'AF_UNIX'):
                        continue
                    self.connection_info = (socket.AF_UNIX, socket.SOCK_STREAM,
                                            address)
                    logging.debug(
                        "Will connect to X display via UNIX socket "
                        "%s, no authentication", address)
                    break
            else:
                self.connection_info = (socket.AF_INET, socket.SOCK_STREAM,
                                        ('127.0.0.1', tcp_portnum))
                logging.debug(
                    "Will connect to X display 127.0.0.1:%d via IPv4/TCP, "
                    "no authentication", tcp_portnum)

        if self.connection_info is None:
            raise RuntimeError("Couldn't determine how to connect to local X "
                               "server, DISPLAY is %s" %
                               (repr(os.environ['DISPLAY'])
                                if 'DISPLAY' is os.environ else 'not set'))

    @classmethod
    def _locate_display(cls):
        """Reads $DISPLAY and figures out possible sockets.
        """
        # We default to ":0", Xming for instance doesn't set $DISPLAY
        display = os.environ.get('DISPLAY', ':0')

        # It might be the full path to a UNIX socket
        if display.startswith('/'):
            return [(Xauth.FAMILY_LOCAL, display)], None

        local_addr, local_display = display.rsplit(':', 1)
        local_display = int(local_display.split('.', 1)[0])

        # Let's order the socket families: IPv4 first, then v6, then others
        def sort_families(gai, order={socket.AF_INET: 0, socket.AF_INET6: 1}):
            return sorted(gai, key=lambda x: order.get(x[0], 999999))

        # Network addresses of the local machine
        local_addresses = []
        for family, socktype, proto, canonname, sockaddr in \
                sort_families(socket.getaddrinfo(socket.gethostname(), 6000)):
            try:
                family = cls.SOCK2X[family]
            except KeyError:
                continue
            local_addresses.append((family, sockaddr[0]))

        logging.debug("Local addresses: %s", (local_addresses, ))

        # Determine possible addresses for $DISPLAY
        if not local_addr:
            possible = [(Xauth.FAMILY_LOCAL,
                         '/tmp/.X11-unix/X%d' % local_display)]
            possible += local_addresses
        else:
            local_possible = False
            possible = []
            for family, socktype, proto, canonname, sockaddr in \
                    sort_families(socket.getaddrinfo(local_addr, 6000)):
                try:
                    family = cls.SOCK2X[family]
                except KeyError:
                    continue
                if (family, sockaddr[0]) in local_addresses:
                    local_possible = True
                possible.append((family, sockaddr[0]))
            if local_possible:
                possible = [
                    (Xauth.FAMILY_LOCAL, '/tmp/.X11-unix/X%d' % local_display)
                ] + possible

        return possible, local_display

    @property
    def port_forward(self):
        """Builds the port forwarding info, for `run_interactive()`.

        Just requests port 6015 on the remote host to be forwarded to the X
        socket identified by `self.connection_info`.
        """
        if not self.enabled:
            return []

        @contextlib.contextmanager
        def connect(src_addr):
            logging.info("Got remote X connection from %s", (src_addr, ))
            logging.debug("Connecting to X server: %s",
                          (self.connection_info, ))
            sock = socket.socket(*self.connection_info[:2])
            sock.connect(self.connection_info[2])
            yield sock
            sock.close()
            logging.info("X connection from %s closed", (src_addr, ))

        return [(6000 + self.display, connect)]

    def fix_env(self, env):
        """Sets ``$XAUTHORITY`` and ``$DISPLAY`` in the environment.
        """
        if not self.enabled:
            return env
        new_env = dict(env)
        new_env['XAUTHORITY'] = str(self.xauth)
        if self.target[0] == 'local':
            new_env['DISPLAY'] = '127.0.0.1:%d' % self.display
        elif self.target[0] == 'internet':
            new_env['DISPLAY'] = '%s:%d' % (self.target[1], self.display)
        return new_env

    @property
    def init_cmds(self):
        """Gets the commands to setup X on the server before the experiment.
        """
        if not self.enabled or self.xauth_record is None:
            return []

        if self.target[0] == 'local':
            xauth_record = Xauth(Xauth.FAMILY_LOCAL, self.target[1],
                                 self.display, self.xauth_record.name,
                                 self.xauth_record.data)
        elif self.target[0] == 'internet':
            xauth_record = Xauth(Xauth.FAMILY_INTERNET,
                                 socket.inet_aton(self.target[1]),
                                 self.display, self.xauth_record.name,
                                 self.xauth_record.data)
        else:
            raise RuntimeError("Invalid target display type")
        buf = xauth_record.as_bytes()
        xauth = ''.join(
            ('\\x%02x' % ord(buf[i:i + 1])) for i in xrange(len(buf)))
        return ['echo -ne "%s" > %s' % (xauth, self.xauth)]
Exemple #45
0
def docker_run(args):
    """Runs the experiment in the container.
    """
    target = Path(args.target[0])
    unpacked_info = read_dict(target)
    cmdline = args.cmdline

    # Sanity check
    if args.detach and args.x11:
        logging.critical("Error: Can't use X11 forwarding if you're detaching")
        raise UsageError

    # Loads config
    config = load_config(target / 'config.yml', True)
    runs = config.runs

    selected_runs = get_runs(runs, args.run, cmdline)

    # Get current image name
    if 'current_image' in unpacked_info:
        image = unpacked_info['current_image']
        logging.debug("Running from image %s", image.decode('ascii'))
    else:
        logging.critical("Image doesn't exist yet, have you run setup/build?")
        sys.exit(1)

    # Name of new container
    if args.detach:
        container = make_unique_name(b'reprounzip_detached_')
    else:
        container = make_unique_name(b'reprounzip_run_')

    hostname = runs[selected_runs[0]].get('hostname', 'reprounzip')

    # X11 handler
    if args.x11:
        local_ip = get_local_addr()

        docker_host = local_ip
        if os.environ.get('DOCKER_HOST'):
            m = _dockerhost_re.match(os.environ['DOCKER_HOST'])
            if m is not None:
                docker_host = m.group(1)

        if args.tunneled_x11:
            x11 = X11Handler(True, ('internet', docker_host), args.x11_display)
        else:
            x11 = X11Handler(True, ('internet', local_ip), args.x11_display)

            if (docker_host != local_ip and docker_host != 'localhost'
                    and not docker_host.startswith('127.')
                    and not docker_host.startswith('192.168.99.')):
                ssh_cmdline = ' '.join('-R*:%(p)d:127.0.0.1:%(p)d' %
                                       {'p': port}
                                       for port, connector in x11.port_forward)
                logging.warning(
                    "You requested X11 forwarding but the Docker container "
                    "appears to be running remotely. It is probable that it "
                    "won't be able to connect to the local display. Creating "
                    "a remote SSH tunnel and running with --tunneled-x11 "
                    "might help (%s).", ssh_cmdline)
    else:
        x11 = X11Handler(False, ('local', hostname), args.x11_display)

    cmds = []
    for run_number in selected_runs:
        run = runs[run_number]
        cmd = 'cd %s && ' % shell_escape(run['workingdir'])
        cmd += '/busybox env -i '
        environ = x11.fix_env(run['environ'])
        environ = fixup_environment(environ, args)
        cmd += ' '.join('%s=%s' % (shell_escape(k), shell_escape(v))
                        for k, v in iteritems(environ))
        cmd += ' '
        # FIXME : Use exec -a or something if binary != argv[0]
        if cmdline is None:
            argv = [run['binary']] + run['argv'][1:]
        else:
            argv = cmdline
        cmd += ' '.join(shell_escape(a) for a in argv)
        uid = run.get('uid', 1000)
        gid = run.get('gid', 1000)
        cmd = '/rpzsudo \'#%d\' \'#%d\' /busybox sh -c %s' % (
            uid, gid, shell_escape(cmd))
        cmds.append(cmd)
    cmds = x11.init_cmds + cmds
    cmds = ' && '.join(cmds)

    signals.pre_run(target=target)

    # Creates forwarders
    forwarders = []
    for port, connector in x11.port_forward:
        forwarders.append(LocalForwarder(connector, port))

    if args.detach:
        logging.info("Start container %s (detached)",
                     container.decode('ascii'))
        retcode = interruptible_call([
            'docker', 'run', b'--name=' + container, '-h', hostname, '-d', '-t'
        ] + args.docker_option + [image, '/busybox', 'sh', '-c', cmds])
        if retcode != 0:
            logging.critical("docker run failed with code %d", retcode)
            subprocess.call(['docker', 'rm', '-f', container])
            sys.exit(1)
        return

    # Run command in container
    logging.info("Starting container %s", container.decode('ascii'))
    retcode = interruptible_call(
        ['docker', 'run', b'--name=' + container, '-h', hostname, '-i', '-t'] +
        args.docker_option + [image, '/busybox', 'sh', '-c', cmds])
    if retcode != 0:
        logging.critical("docker run failed with code %d", retcode)
        subprocess.call(['docker', 'rm', '-f', container])
        sys.exit(1)

    # Get exit status from "docker inspect"
    out = subprocess.check_output(['docker', 'inspect', container])
    outjson = json.loads(out.decode('ascii'))
    if (outjson[0]["State"]["Running"] is not False
            or outjson[0]["State"]["Paused"] is not False):
        logging.error("Invalid container state after execution:\n%s",
                      json.dumps(outjson[0]["State"]))
    retcode = outjson[0]["State"]["ExitCode"]
    stderr.write("\n*** Command finished, status: %d\n" % retcode)

    # Commit to create new image
    new_image = make_unique_name(b'reprounzip_image_')
    logging.info("Committing container %s to image %s",
                 container.decode('ascii'), new_image.decode('ascii'))
    subprocess.check_call(['docker', 'commit', container, new_image])

    # Update image name
    unpacked_info['current_image'] = new_image
    write_dict(target, unpacked_info)

    # Remove the container
    logging.info("Destroying container %s", container.decode('ascii'))
    retcode = subprocess.call(['docker', 'rm', container])
    if retcode != 0:
        logging.error("Error deleting container %s", container.decode('ascii'))

    # Untag previous image, unless it is the initial_image
    if image != unpacked_info['initial_image']:
        logging.info("Untagging previous image %s", image.decode('ascii'))
        subprocess.check_call(['docker', 'rmi', image])

    # Update input file status
    metadata_update_run(config, unpacked_info, selected_runs)
    write_dict(target, unpacked_info)

    signals.post_run(target=target, retcode=retcode)
Exemple #46
0
def chroot_create(args):
    """Unpacks the experiment in a folder so it can be run with chroot.

    All the files in the pack are unpacked; system files are copied only if
    they were not packed, and busybox is installed if /bin/sh wasn't packed.

    In addition, input files are put in a tar.gz (so they can be put back after
    an upload) and the configuration file is extracted.
    """
    if not args.pack:
        logging.critical("setup/create needs the pack filename")
        sys.exit(1)

    pack = Path(args.pack[0])
    target = Path(args.target[0])
    if target.exists():
        logging.critical("Target directory exists")
        sys.exit(1)

    if DefaultAbstractPath is not PosixPath:
        logging.critical("Not unpacking on POSIX system")
        sys.exit(1)

    signals.pre_setup(target=target, pack=pack)

    # We can only restore owner/group of files if running as root
    restore_owner = should_restore_owner(args.restore_owner)

    # Unpacks configuration file
    rpz_pack = RPZPack(pack)
    rpz_pack.extract_config(target / 'config.yml')

    # Loads config
    config = load_config_file(target / 'config.yml', True)
    packages = config.packages

    target.mkdir()
    root = (target / 'root').absolute()
    root.mkdir()

    # Checks that everything was packed
    packages_not_packed = [pkg for pkg in packages if not pkg.packfiles]
    if packages_not_packed:
        record_usage(chroot_missing_pkgs=True)
        logging.warning("According to configuration, some files were left out "
                        "because they belong to the following packages:%s"
                        "\nWill copy files from HOST SYSTEM",
                        ''.join('\n    %s' % pkg
                                for pkg in packages_not_packed))
        missing_files = False
        for pkg in packages_not_packed:
            for f in pkg.files:
                path = Path(f.path)
                if not path.exists():
                    logging.error(
                        "Missing file %s (from package %s) on host, "
                        "experiment will probably miss it",
                        path, pkg.name)
                    missing_files = True
                    continue
                dest = join_root(root, path)
                dest.parent.mkdir(parents=True)
                if path.is_link():
                    dest.symlink(path.read_link())
                else:
                    path.copy(dest)
                if restore_owner:
                    stat = path.stat()
                    dest.chown(stat.st_uid, stat.st_gid)
        if missing_files:
            record_usage(chroot_mising_files=True)

    # Unpacks files
    members = rpz_pack.list_data()
    for m in members:
        # Remove 'DATA/' prefix
        m.name = str(rpz_pack.remove_data_prefix(m.name))
    if not restore_owner:
        uid = os.getuid()
        gid = os.getgid()
        for m in members:
            m.uid = uid
            m.gid = gid
    logging.info("Extracting files...")
    rpz_pack.extract_data(root, members)
    rpz_pack.close()

    # Sets up /bin/sh and /usr/bin/env, downloading busybox if necessary
    sh_path = join_root(root, Path('/bin/sh'))
    env_path = join_root(root, Path('/usr/bin/env'))
    if not sh_path.lexists() or not env_path.lexists():
        logging.info("Setting up busybox...")
        busybox_path = join_root(root, Path('/bin/busybox'))
        busybox_path.parent.mkdir(parents=True)
        with make_dir_writable(join_root(root, Path('/bin'))):
            download_file(busybox_url(config.runs[0]['architecture']),
                          busybox_path,
                          'busybox-%s' % config.runs[0]['architecture'])
            busybox_path.chmod(0o755)
            if not sh_path.lexists():
                sh_path.parent.mkdir(parents=True)
                sh_path.symlink('/bin/busybox')
            if not env_path.lexists():
                env_path.parent.mkdir(parents=True)
                env_path.symlink('/bin/busybox')

    # Original input files, so upload can restore them
    input_files = [f.path for n, f in iteritems(config.inputs_outputs)
                   if f.read_runs]
    if input_files:
        logging.info("Packing up original input files...")
        inputtar = tarfile.open(str(target / 'inputs.tar.gz'), 'w:gz')
        for ifile in input_files:
            inputtar.add(str(join_root(root, ifile)),
                         str(ifile))
        inputtar.close()

    # Meta-data for reprounzip
    write_dict(target / '.reprounzip', {}, 'chroot')

    signals.post_setup(target=target, pack=pack)
Exemple #47
0
def print_info(args):
    """Writes out some information about a pack file.
    """
    pack = Path(args.pack[0])

    # Loads config
    runs, packages, other_files = config = load_config(pack)
    inputs_outputs = config.inputs_outputs

    pack_total_size = 0
    pack_total_paths = 0
    pack_files = 0
    pack_dirs = 0
    pack_symlinks = 0
    pack_others = 0
    rpz_pack = RPZPack(pack)
    for m in rpz_pack.list_data():
        pack_total_size += m.size
        pack_total_paths += 1
        if m.isfile():
            pack_files += 1
        elif m.isdir():
            pack_dirs += 1
        elif m.issym():
            pack_symlinks += 1
        else:
            pack_others += 1
    rpz_pack.close()

    meta_total_paths = 0
    meta_packed_packages_files = 0
    meta_unpacked_packages_files = 0
    meta_packages = len(packages)
    meta_packed_packages = 0
    for package in packages:
        nb = len(package.files)
        meta_total_paths += nb
        if package.packfiles:
            meta_packed_packages_files += nb
            meta_packed_packages += 1
        else:
            meta_unpacked_packages_files += nb
    nb = len(other_files)
    meta_total_paths += nb
    meta_packed_paths = meta_packed_packages_files + nb

    if runs:
        meta_architecture = runs[0]['architecture']
        if any(r['architecture'] != meta_architecture
               for r in runs):
            logging.warning("Runs have different architectures")
        meta_distribution = runs[0]['distribution']
        if any(r['distribution'] != meta_distribution
               for r in runs):
            logging.warning("Runs have different distributions")
        meta_distribution = ' '.join(t for t in meta_distribution if t)

    current_architecture = platform.machine().lower()
    current_distribution = platform.linux_distribution()[0:2]
    current_distribution = ' '.join(t for t in current_distribution if t)

    print("Pack file: %s" % pack)
    print("\n----- Pack information -----")
    print("Compressed size: %s" % hsize(pack.size()))
    print("Unpacked size: %s" % hsize(pack_total_size))
    print("Total packed paths: %d" % pack_total_paths)
    if args.verbosity >= 3:
        print("    Files: %d" % pack_files)
        print("    Directories: %d" % pack_dirs)
        print("    Symbolic links: %d" % pack_symlinks)
    if pack_others:
        print("    Unknown (what!?): %d" % pack_others)
    print("\n----- Metadata -----")
    if args.verbosity >= 3:
        print("Total paths: %d" % meta_total_paths)
        print("Listed packed paths: %d" % meta_packed_paths)
    if packages:
        print("Total software packages: %d" % meta_packages)
        print("Packed software packages: %d" % meta_packed_packages)
        if args.verbosity >= 3:
            print("Files from packed software packages: %d" %
                  meta_packed_packages_files)
            print("Files from unpacked software packages: %d" %
                  meta_unpacked_packages_files)
    if runs:
        print("Architecture: %s (current: %s)" % (meta_architecture,
                                                  current_architecture))
        print("Distribution: %s (current: %s)" % (
              meta_distribution, current_distribution or "(not Linux)"))
        print("Executions (%d):" % len(runs))
        for i, run in enumerate(runs):
            cmdline = ' '.join(shell_escape(a) for a in run['argv'])
            if len(runs) > 1:
                print("    %d: %s" % (i, cmdline))
            else:
                print("    %s" % cmdline)
            if args.verbosity >= 2:
                print("        wd: %s" % run['workingdir'])
                if 'signal' in run:
                    print("        signal: %d" % run['signal'])
                else:
                    print("        exitcode: %d" % run['exitcode'])

    if inputs_outputs:
        if args.verbosity < 2:
            print("Inputs/outputs files (%d) :%s" % (
                  len(inputs_outputs), ", ".join(inputs_outputs)))
        else:
            print("Inputs/outputs files (%d):" % len(inputs_outputs))
            for name, f in iteritems(inputs_outputs):
                t = []
                if f.read_runs:
                    t.append("in")
                if f.write_runs:
                    t.append("out")
                print("    %s (%s): %s" % (name, ' '.join(t), f.path))

    # Unpacker compatibility
    print("\n----- Unpackers -----")
    unpacker_status = {}
    for name, upk in iteritems(unpackers):
        if 'test_compatibility' in upk:
            compat = upk['test_compatibility']
            if callable(compat):
                compat = compat(pack, config=config)
            if isinstance(compat, (tuple, list)):
                compat, msg = compat
            else:
                msg = None
            unpacker_status.setdefault(compat, []).append((name, msg))
        else:
            unpacker_status.setdefault(None, []).append((name, None))
    for s, n in [(COMPAT_OK, "Compatible"), (COMPAT_MAYBE, "Unknown"),
                 (COMPAT_NO, "Incompatible")]:
        if s != COMPAT_OK and args.verbosity < 2:
            continue
        if s not in unpacker_status:
            continue
        upks = unpacker_status[s]
        print("%s (%d):" % (n, len(upks)))
        for upk_name, msg in upks:
            if msg is not None:
                print("    %s (%s)" % (upk_name, msg))
            else:
                print("    %s" % upk_name)
Exemple #48
0
def vagrant_run(args):
    """Runs the experiment in the virtual machine.
    """
    target = Path(args.target[0])
    unpacked_info = read_dict(target)
    use_chroot = unpacked_info['use_chroot']
    cmdline = args.cmdline

    check_vagrant_version()

    # Loads config
    config = load_config(target / 'config.yml', True)
    runs = config.runs

    selected_runs = get_runs(runs, args.run, cmdline)

    hostname = runs[selected_runs[0]].get('hostname', 'reprounzip')

    # Port forwarding
    ports = parse_ports(args.expose_port)

    # If the requested ports are not a subset of the ones already set on the
    # VM, we have to update the Vagrantfile and issue `vagrant reload`, which
    # will reboot the machine
    req_ports = set(ports)
    set_ports = set(unpacked_info.get('ports', []))
    if not req_ports.issubset(set_ports):
        # Build new set of forwarded ports: the ones already set + the one just
        # requested
        # The ones we request now override the previous config
        all_ports = dict((host, (guest, proto))
                         for host, guest, proto in set_ports)
        for host, guest, proto in req_ports:
            all_ports[host] = guest, proto
        unpacked_info['ports'] = sorted(
            (host, guest, proto)
            for host, (guest, proto) in iteritems(all_ports))

        write_vagrantfile(target, unpacked_info)
        logger.info("Some requested ports are not yet forwarded, running "
                    "'vagrant reload'")
        retcode = subprocess.call(['vagrant', 'reload', '--no-provision'],
                                  cwd=target.path)
        if retcode != 0:
            logger.critical("vagrant reload failed with code %d, aborting",
                            retcode)
            sys.exit(1)
        write_dict(target, unpacked_info)

    # X11 handler
    if unpacked_info['gui']:
        x11 = LocalX11Handler()
    else:
        x11 = X11Handler(args.x11, ('local', hostname), args.x11_display)

    cmds = []
    for run_number in selected_runs:
        run = runs[run_number]
        cmd = 'cd %s && ' % shell_escape(run['workingdir'])
        if use_chroot:
            cmd += '/busybox env -i '
        else:
            cmd += '/usr/bin/env -i '
        environ = x11.fix_env(run['environ'])
        environ = fixup_environment(environ, args)
        cmd += ' '.join('%s=%s' % (shell_escape(k), shell_escape(v))
                        for k, v in iteritems(environ))
        cmd += ' '
        # FIXME : Use exec -a or something if binary != argv[0]
        if cmdline is None:
            argv = [run['binary']] + run['argv'][1:]
        else:
            argv = cmdline
        cmd += ' '.join(shell_escape(a) for a in argv)
        uid = run.get('uid', 1000)
        gid = run.get('gid', 1000)
        if use_chroot:
            userspec = '%s:%s' % (uid, gid)
            cmd = ('chroot --userspec=%s /experimentroot '
                   '/bin/sh -c %s' % (
                       userspec,
                       shell_escape(cmd)))
        else:
            cmd = 'sudo -u \'#%d\' sh -c %s' % (uid, shell_escape(cmd))
        cmds.append(cmd)
    if use_chroot:
        cmds = ['chroot /experimentroot /bin/sh -c %s' % shell_escape(c)
                for c in x11.init_cmds] + cmds
    else:
        cmds = x11.init_cmds + cmds
    cmds = ' && '.join(cmds)
    # Sets the hostname to the original experiment's machine's
    # FIXME: not reentrant: this restores the Vagrant machine's hostname after
    # the run, which might cause issues if several "reprounzip vagrant run" are
    # running at once
    cmds = ('OLD_HOSTNAME=$(/bin/hostname); /bin/hostname %s; ' % hostname +
            cmds +
            '; RES=$?; /bin/hostname "$OLD_HOSTNAME"; exit $RES')
    cmds = '/usr/bin/sudo /bin/sh -c %s' % shell_escape(cmds)

    # Gets vagrant SSH parameters
    info = machine_setup(target)

    signals.pre_run(target=target)

    interactive = not (args.no_stdin or
                       os.environ.get('REPROUNZIP_NON_INTERACTIVE'))
    retcode = run_interactive(info, interactive,
                              cmds,
                              not args.no_pty,
                              x11.port_forward)
    stderr.write("\r\n*** Command finished, status: %d\r\n" % retcode)

    # Update input file status
    metadata_update_run(config, unpacked_info, selected_runs)
    write_dict(target, unpacked_info)

    signals.post_run(target=target, retcode=retcode)
Exemple #49
0
def generate(target, configfile, database, all_forks=False):
    """Main function for the graph subcommand.
    """
    # In here, a file is any file on the filesystem. A binary is a file, that
    # gets executed. A process is a system-level task, identified by its pid
    # (pids don't get reused in the database).
    # What I call program is the couple (process, binary), so forking creates a
    # new program (with the same binary) and exec'ing creates a new program as
    # well (with the same process)
    # Because of this, fork+exec will create an intermediate program that
    # doesn't do anything (new process but still old binary). If that program
    # doesn't do anything worth showing on the graph, it will be erased, unless
    # all_forks is True (--all-forks).

    # Reads package ownership from the configuration
    if not configfile.is_file():
        logging.critical("Configuration file does not exist!\n"
                         "Did you forget to run 'reprozip trace'?\n"
                         "If not, you might want to use --dir to specify an "
                         "alternate location.")
        sys.exit(1)
    runs, packages, other_files = load_config(configfile, canonical=False)
    packages = dict((f.path, pkg) for pkg in packages for f in pkg.files)

    if PY3:
        # On PY3, connect() only accepts unicode
        conn = sqlite3.connect(str(database))
    else:
        conn = sqlite3.connect(database.path)
    conn.row_factory = sqlite3.Row

    # This is a bit weird. We need to iterate on all types of events at the
    # same time, ordering by timestamp, so we decorate-sort-undecorate
    # Decoration adds timestamp (for sorting) and tags by event type, one of
    # 'process', 'open' or 'exec'

    # Reads processes from the database
    process_cursor = conn.cursor()
    process_rows = process_cursor.execute(
        '''
        SELECT id, parent, timestamp
        FROM processes
        ORDER BY id
        ''')
    processes = {}
    all_programs = []

    # ... and opened files...
    file_cursor = conn.cursor()
    file_rows = file_cursor.execute(
        '''
        SELECT name, timestamp, mode, process
        FROM opened_files
        ORDER BY id
        ''')
    binaries = set()
    files = OrderedSet()
    edges = OrderedSet()

    # ... as well as executed files.
    exec_cursor = conn.cursor()
    exec_rows = exec_cursor.execute(
        '''
        SELECT name, timestamp, process, argv
        FROM executed_files
        ORDER BY id
        ''')

    # Loop on all event lists
    logging.info("Getting all events from database...")
    rows = heapq.merge(((r[2], 'process', r) for r in process_rows),
                       ((r[1], 'open', r) for r in file_rows),
                       ((r[1], 'exec', r) for r in exec_rows))
    for ts, event_type, data in rows:
        if event_type == 'process':
            r_id, r_parent, r_timestamp = data
            if r_parent is not None:
                parent = processes[r_parent]
                binary = parent.binary
            else:
                parent = None
                binary = None
            p = Process(r_id,
                        parent,
                        r_timestamp,
                        False,
                        binary,
                        C_INITIAL if r_parent is None else C_FORK)
            processes[r_id] = p
            all_programs.append(p)

        elif event_type == 'open':
            r_name, r_timestamp, r_mode, r_process = data
            r_name = PosixPath(r_name)
            if r_mode != FILE_WDIR:
                process = processes[r_process]
                files.add(r_name)
                edges.add((process, r_name, r_mode, None))

        elif event_type == 'exec':
            r_name, r_timestamp, r_process, r_argv = data
            r_name = PosixPath(r_name)
            process = processes[r_process]
            binaries.add(r_name)
            # Here we split this process in two "programs", unless the previous
            # one hasn't done anything since it was created via fork()
            if not all_forks and not process.acted:
                process.binary = r_name
                process.created = C_FORKEXEC
                process.acted = True
            else:
                process = Process(process.pid,
                                  process,
                                  r_timestamp,
                                  True,         # Hides exec only once
                                  r_name,
                                  C_EXEC)
                all_programs.append(process)
                processes[r_process] = process
            argv = tuple(r_argv.split('\0'))
            if not argv[-1]:
                argv = argv[:-1]
            edges.add((process, r_name, None, argv))

    process_cursor.close()
    file_cursor.close()
    conn.close()

    # Puts files in packages
    logging.info("Organizes packages...")
    package_files = {}
    other_files = []
    for f in files:
        pkg = packages.get(f)
        if pkg is not None:
            package_files.setdefault((pkg.name, pkg.version), []).append(f)
        else:
            other_files.append(f)

    # Writes DOT file
    with target.open('w', encoding='utf-8', newline='\n') as fp:
        fp.write('digraph G {\n    /* programs */\n    node [shape=box];\n')
        # Programs
        logging.info("Writing programs...")
        for program in all_programs:
            fp.write('    prog%d [label="%s (%d)"];\n' % (
                     id(program), program.binary or "-", program.pid))
            if program.parent is not None:
                reason = ''
                if program.created == C_FORK:
                    reason = "fork"
                elif program.created == C_EXEC:
                    reason = "exec"
                elif program.created == C_FORKEXEC:
                    reason = "fork+exec"
                fp.write('    prog%d -> prog%d [label="%s"];\n' % (
                         id(program.parent), id(program), reason))

        fp.write('\n    node [shape=ellipse];\n\n    /* system packages */\n')

        # Files from packages
        logging.info("Writing packages...")
        for i, ((name, version), files) in enumerate(iteritems(package_files)):
            fp.write('    subgraph cluster%d {\n        label=' % i)
            if version:
                fp.write('"%s %s";\n' % (escape(name), escape(version)))
            else:
                fp.write('"%s";\n' % escape(name))
            for f in files:
                fp.write('        "%s";\n' % escape(unicode_(f)))
            fp.write('    }\n')

        fp.write('\n    /* other files */\n')

        # Other files
        logging.info("Writing other files...")
        for f in other_files:
            fp.write('    "%s"\n' % escape(unicode_(f)))

        fp.write('\n')

        # Edges
        logging.info("Connecting edges...")
        for prog, f, mode, argv in edges:
            if mode is None:
                fp.write('    "%s" -> prog%d [color=blue, label="%s"];\n' % (
                         escape(unicode_(f)),
                         id(prog),
                         escape(' '.join(argv))))
            elif mode & FILE_WRITE:
                fp.write('    prog%d -> "%s" [color=red];\n' % (
                         id(prog), escape(unicode_(f))))
            elif mode & FILE_READ:
                fp.write('    "%s" -> prog%d [color=green];\n' % (
                         escape(unicode_(f)), id(prog)))

        fp.write('}\n')
Exemple #50
0
def vagrant_run(args):
    """Runs the experiment in the virtual machine.
    """
    target = Path(args.target[0])
    use_chroot = read_dict(target / '.reprounzip').get('use_chroot', True)
    cmdline = args.cmdline

    check_vagrant_version()

    # Loads config
    runs, packages, other_files = load_config(target / 'config.yml', True)

    selected_runs = get_runs(runs, args.run, cmdline)

    hostname = runs[selected_runs[0]].get('hostname', 'reprounzip')

    # X11 handler
    x11 = X11Handler(args.x11, ('local', hostname), args.x11_display)

    cmds = []
    for run_number in selected_runs:
        run = runs[run_number]
        cmd = 'cd %s && ' % shell_escape(run['workingdir'])
        cmd += '/usr/bin/env -i '
        environ = x11.fix_env(run['environ'])
        cmd += ' '.join('%s=%s' % (k, shell_escape(v))
                        for k, v in iteritems(environ))
        cmd += ' '
        # FIXME : Use exec -a or something if binary != argv[0]
        if cmdline is None:
            argv = [run['binary']] + run['argv'][1:]
        else:
            argv = cmdline
        cmd += ' '.join(shell_escape(a) for a in argv)
        uid = run.get('uid', 1000)
        gid = run.get('gid', 1000)
        if use_chroot:
            userspec = '%s:%s' % (uid, gid)
            cmd = ('chroot --userspec=%s /experimentroot '
                   '/bin/sh -c %s' % (userspec, shell_escape(cmd)))
        else:
            cmd = 'sudo -u \'#%d\' sh -c %s' % (uid, shell_escape(cmd))
        cmds.append(cmd)
    if use_chroot:
        cmds = [
            'chroot /experimentroot /bin/sh -c %s' % shell_escape(c)
            for c in x11.init_cmds
        ] + cmds
    else:
        cmds = x11.init_cmds + cmds
    cmds = ' && '.join(cmds)
    # Sets the hostname to the original experiment's machine's
    # FIXME: not reentrant: this restores the Vagrant machine's hostname after
    # the run, which might cause issues if several "reprounzip vagrant run" are
    # running at once
    cmds = ('OLD_HOSTNAME=$(/bin/hostname); /bin/hostname %s; ' % hostname +
            cmds + '; RES=$?; /bin/hostname "$OLD_HOSTNAME"; exit $RES')
    cmds = '/usr/bin/sudo /bin/sh -c %s' % shell_escape(cmds)

    # Gets vagrant SSH parameters
    info = get_ssh_parameters(target)

    signals.pre_run(target=target)

    interactive = not (args.no_stdin
                       or os.environ.get('REPROUNZIP_NON_INTERACTIVE'))
    retcode = run_interactive(info, interactive, cmds, not args.no_pty,
                              x11.port_forward)
    sys.stderr.write("\r\n*** Command finished, status: %d\r\n" % retcode)

    signals.post_run(target=target, retcode=retcode)
def docker_setup_create(args):
    """Sets up the experiment to be run in a Docker-built container.
    """
    pack = Path(args.pack[0])
    target = Path(args.target[0])
    if target.exists():
        logger.critical("Target directory exists")
        sys.exit(1)

    signals.pre_setup(target=target, pack=pack)

    target.mkdir()

    try:
        # Unpacks configuration file
        rpz_pack = RPZPack(pack)
        rpz_pack.extract_config(target / 'config.yml')

        # Loads config
        runs, packages, other_files = config = load_config(
            target / 'config.yml', True)

        if args.base_image:
            record_usage(docker_explicit_base=True)
            base_image = args.base_image[0]
            if args.distribution:
                target_distribution = args.distribution[0]
            else:
                target_distribution = None
        else:
            target_distribution, base_image = select_image(runs)
        logger.info("Using base image %s", base_image)
        logger.debug("Distribution: %s", target_distribution or "unknown")

        rpz_pack.copy_data_tar(target / 'data.tgz')

        arch = runs[0]['architecture']

        # Writes Dockerfile
        logger.info("Writing %s...", target / 'Dockerfile')
        with (target / 'Dockerfile').open('w', encoding='utf-8',
                                          newline='\n') as fp:
            fp.write('FROM %s\n\n' % base_image)

            # Installs busybox
            download_file(busybox_url(arch),
                          target / 'busybox',
                          'busybox-%s' % arch)
            fp.write('COPY busybox /busybox\n')

            # Installs rpzsudo
            download_file(sudo_url(arch),
                          target / 'rpzsudo',
                          'rpzsudo-%s' % arch)
            fp.write('COPY rpzsudo /rpzsudo\n\n')

            fp.write('COPY data.tgz /reprozip_data.tgz\n\n')
            fp.write('COPY rpz-files.list /rpz-files.list\n')
            fp.write('RUN \\\n'
                     '    chmod +x /busybox /rpzsudo && \\\n')

            if args.install_pkgs:
                # Install every package through package manager
                missing_packages = []
            else:
                # Only install packages that were not packed
                missing_packages = [pkg for pkg in packages if pkg.packfiles]
                packages = [pkg for pkg in packages if not pkg.packfiles]
            if packages:
                record_usage(docker_install_pkgs=True)
                try:
                    installer = select_installer(pack, runs,
                                                 target_distribution)
                except CantFindInstaller as e:
                    logger.error("Need to install %d packages but couldn't "
                                 "select a package installer: %s",
                                 len(packages), e)
                    sys.exit(1)
                # Updates package sources
                update_script = installer.update_script()
                if update_script:
                    fp.write('    %s && \\\n' % update_script)
                # Installs necessary packages
                fp.write('    %s && \\\n' % installer.install_script(packages))
                logger.info("Dockerfile will install the %d software "
                            "packages that were not packed", len(packages))
            else:
                record_usage(docker_install_pkgs=False)

            # Untar
            paths = set()
            pathlist = []
            # Add intermediate directories, and check for existence in the tar
            logger.info("Generating file list...")
            missing_files = chain.from_iterable(pkg.files
                                                for pkg in missing_packages)
            data_files = rpz_pack.data_filenames()
            listoffiles = list(chain(other_files, missing_files))
            for f in listoffiles:
                if f.path.name == 'resolv.conf' and (
                        f.path.lies_under('/etc') or
                        f.path.lies_under('/run') or
                        f.path.lies_under('/var')):
                    continue
                path = PosixPath('/')
                for c in rpz_pack.remove_data_prefix(f.path).components:
                    path = path / c
                    if path in paths:
                        continue
                    paths.add(path)
                    if path in data_files:
                        pathlist.append(path)
                    else:
                        logger.info("Missing file %s", path)
            rpz_pack.close()
            # FIXME : for some reason we need reversed() here, I'm not sure why
            # Need to read more of tar's docs.
            # TAR bug: --no-overwrite-dir removes --keep-old-files
            with (target / 'rpz-files.list').open('wb') as lfp:
                for p in reversed(pathlist):
                    lfp.write(join_root(rpz_pack.data_prefix, p).path)
                    lfp.write(b'\0')
            fp.write('    cd / && '
                     '(tar zpxf /reprozip_data.tgz -U --recursive-unlink '
                     '--numeric-owner --strip=1 --null -T /rpz-files.list || '
                     '/busybox echo "TAR reports errors, this might or might '
                     'not prevent the execution to run")\n')

            # Setup entry point
            fp.write('COPY rpz_entrypoint.sh /rpz_entrypoint.sh\n'
                     'ENTRYPOINT ["/busybox", "sh", "/rpz_entrypoint.sh"]\n')

        # Write entry point script
        logger.info("Writing %s...", target / 'rpz_entrypoint.sh')
        with (target / 'rpz_entrypoint.sh').open('w', encoding='utf-8',
                                                 newline='\n') as fp:
            # The entrypoint gets some arguments from the run command
            # By default, it just does all the runs
            # "run N" executes the run with that number
            # "cmd STR" sets a replacement command-line for the next run
            # "do STR" executes a command as-is
            fp.write(
                '#!/bin/sh\n'
                '\n'
                'COMMAND=\n'
                'ENVVARS=\n'
                '\n'
                'if [ $# = 0 ]; then\n'
                '    exec /busybox sh /rpz_entrypoint.sh')
            for nb in irange(len(runs)):
                fp.write(' run %d' % nb)
            fp.write(
                '\n'
                'fi\n'
                '\n'
                'while [ $# != 0 ]; do\n'
                '    case "$1" in\n'
                '        help)\n'
                '            echo "Image built from reprounzip-docker" >&2\n'
                '            echo "Usage: docker run <image> [cmd "word [word '
                '...]"] [run <R>]" >&2\n'
                '            echo "    \\`cmd ...\\` changes the command for '
                'the next \\`run\\` option" >&2\n'
                '            echo "    \\`run <name|number>\\` runs the '
                'specified run" >&2\n'
                '            echo "By default, all the runs are executed." '
                '>&2\n'
                '            echo "The runs in this image are:" >&2\n')
            for run in runs:
                fp.write(
                    '            echo "    {name}: {cmdline}" >&2\n'.format(
                        name=run['id'],
                        cmdline=' '.join(shell_escape(a)
                                         for a in run['argv'])))
            fp.write(
                '            exit 0\n'
                '        ;;\n'
                '        do)\n'
                '            shift\n'
                '            $1\n'
                '        ;;\n'
                '        env)\n'
                '            shift\n'
                '            ENVVARS="$1"\n'
                '        ;;\n'
                '        cmd)\n'
                '            shift\n'
                '            COMMAND="$1"\n'
                '        ;;\n'
                '        run)\n'
                '            shift\n'
                '            case "$1" in\n')
            for i, run in enumerate(runs):
                cmdline = ' '.join([run['binary']] + run['argv'][1:])
                fp.write(
                    '                {name})\n'
                    '                    RUNCOMMAND={cmd}\n'
                    '                    RUNWD={wd}\n'
                    '                    RUNENV={env}\n'
                    '                    RUNUID={uid}\n'
                    '                    RUNGID={gid}\n'
                    '                ;;\n'.format(
                        name='%s|%d' % (run['id'], i),
                        cmd=shell_escape(cmdline),
                        wd=shell_escape(run['workingdir']),
                        env=shell_escape(' '.join(
                            '%s=%s' % (shell_escape(k), shell_escape(v))
                            for k, v in iteritems(run['environ']))),
                        uid=run.get('uid', 1000),
                        gid=run.get('gid', 1000)))
            fp.write(
                '                *)\n'
                '                    echo "RPZ: Unknown run $1" >&2\n'
                '                    exit 1\n'
                '                ;;\n'
                '            esac\n'
                '            if [ -n "$COMMAND" ]; then\n'
                '                RUNCOMMAND="$COMMAND"\n'
                '                COMMAND=\n'
                '            fi\n'
                '            export RUNWD; export RUNENV; export ENVVARS; '
                'export RUNCOMMAND\n'
                '            /rpzsudo "#$RUNUID" "#$RUNGID" /busybox sh -c '
                '"cd \\"\\$RUNWD\\" && /busybox env -i $RUNENV $ENVVARS '
                '$RUNCOMMAND; echo \\"*** Command finished, status: \\$?\\""\n'
                '            ENVVARS=\n'
                '        ;;\n'
                '        *)\n'
                '            echo "RPZ: Unknown option $1" >&2\n'
                '            exit 1\n'
                '        ;;\n'
                '    esac\n'
                '    shift\n'
                'done\n')

        # Meta-data for reprounzip
        write_dict(target, metadata_initial_iofiles(config))

        signals.post_setup(target=target, pack=pack)
    except Exception:
        target.rmtree(ignore_errors=True)
        raise
Exemple #52
0
def directory_create(args):
    """Unpacks the experiment in a folder.

    Only the files that are not part of a package are copied (unless they are
    missing from the system and were packed).

    In addition, input files are put in a tar.gz (so they can be put back after
    an upload) and the configuration file is extracted.
    """
    if not args.pack:
        logging.critical("setup needs the pack filename")
        sys.exit(1)

    pack = Path(args.pack[0])
    target = Path(args.target[0])
    if target.exists():
        logging.critical("Target directory exists")
        sys.exit(1)

    if not issubclass(DefaultAbstractPath, PosixPath):
        logging.critical("Not unpacking on POSIX system")
        sys.exit(1)

    signals.pre_setup(target=target, pack=pack)

    # Unpacks configuration file
    rpz_pack = RPZPack(pack)
    rpz_pack.extract_config(target / 'config.yml')

    # Loads config
    config = load_config_file(target / 'config.yml', True)
    packages = config.packages

    target.mkdir()
    root = (target / 'root').absolute()
    root.mkdir()

    # Checks packages
    missing_files = False
    for pkg in packages:
        if pkg.packfiles:
            continue
        for f in pkg.files:
            if not Path(f.path).exists():
                logging.error(
                    "Missing file %s (from package %s that wasn't packed) "
                    "on host, experiment will probably miss it.",
                    f, pkg.name)
                missing_files = True
    if missing_files:
        record_usage(directory_missing_pkgs=True)
        logging.error("Some packages are missing, you should probably install "
                      "them.\nUse 'reprounzip installpkgs -h' for help")

    # Unpacks files
    members = rpz_pack.list_data()
    for m in members:
        # Remove 'DATA/' prefix
        m.name = str(rpz_pack.remove_data_prefix(m.name))
        # Makes symlink targets relative
        if m.issym():
            linkname = PosixPath(m.linkname)
            if linkname.is_absolute:
                m.linkname = join_root(root, PosixPath(m.linkname)).path
    logging.info("Extracting files...")
    rpz_pack.extract_data(root, members)
    rpz_pack.close()

    # Gets library paths
    lib_dirs = []
    p = subprocess.Popen(['/sbin/ldconfig', '-v', '-N'],
                         stdout=subprocess.PIPE)
    try:
        for l in p.stdout:
            if len(l) < 3 or l[0] in (b' ', b'\t'):
                continue
            if l.endswith(b':\n'):
                lib_dirs.append(Path(l[:-2]))
    finally:
        p.wait()

    # Original input files, so upload can restore them
    input_files = [f.path for n, f in iteritems(config.inputs_outputs)
                   if f.read_runs]
    if input_files:
        logging.info("Packing up original input files...")
        inputtar = tarfile.open(str(target / 'inputs.tar.gz'), 'w:gz')
        for ifile in input_files:
            inputtar.add(str(join_root(root, ifile)),
                         str(ifile))
        inputtar.close()

    # Meta-data for reprounzip
    write_dict(target / '.reprounzip', {}, 'directory')

    signals.post_setup(target=target, pack=pack)