Example #1
0
def chroot_run(args):
    """Runs the command in the chroot.
    """
    target = Path(args.target[0])
    unpacked_info = metadata_read(target, 'chroot')
    cmdline = args.cmdline

    # Loads config
    config = load_config_file(target / 'config.yml', True)
    runs = config.runs

    selected_runs = get_runs(runs, args.run, cmdline)

    root = target / 'root'

    # X11 handler
    x11 = X11Handler(args.x11, ('local', socket.gethostname()),
                     args.x11_display)

    cmds = []
    for run_number in selected_runs:
        run = runs[run_number]
        cmd = 'cd %s && ' % shell_escape(run['workingdir'])
        cmd += '/usr/bin/env -i '
        environ = x11.fix_env(run['environ'])
        environ = fixup_environment(environ, args)
        cmd += ' '.join('%s=%s' % (shell_escape(k), shell_escape(v))
                        for k, v in iteritems(environ))
        cmd += ' '
        # FIXME : Use exec -a or something if binary != argv[0]
        if cmdline is None:
            argv = [run['binary']] + run['argv'][1:]
        else:
            argv = cmdline
        cmd += ' '.join(shell_escape(a) for a in argv)
        userspec = '%s:%s' % (run.get('uid', 1000), run.get('gid', 1000))
        cmd = 'chroot --userspec=%s %s /bin/sh -c %s' % (
            userspec, shell_escape(unicode_(root)), shell_escape(cmd))
        cmds.append(cmd)
    cmds = [
        'chroot %s /bin/sh -c %s' %
        (shell_escape(unicode_(root)), shell_escape(c)) for c in x11.init_cmds
    ] + cmds
    cmds = ' && '.join(cmds)

    # Starts forwarding
    forwarders = []
    for portnum, connector in x11.port_forward:
        fwd = LocalForwarder(connector, portnum)
        forwarders.append(fwd)

    signals.pre_run(target=target)
    retcode = interruptible_call(cmds, shell=True)
    stderr.write("\n*** Command finished, status: %d\n" % retcode)
    signals.post_run(target=target, retcode=retcode)

    # Update input file status
    metadata_update_run(config, unpacked_info, selected_runs)
    metadata_write(target, unpacked_info, 'chroot')
Example #2
0
def chroot_run(args):
    """Runs the command in the chroot.
    """
    target = Path(args.target[0])
    read_dict(target / '.reprounzip', 'chroot')
    cmdline = args.cmdline

    # Loads config
    runs, packages, other_files = load_config_file(target / 'config.yml', True)

    selected_runs = get_runs(runs, args.run, cmdline)

    root = target / 'root'

    # X11 handler
    x11 = X11Handler(args.x11, ('local', socket.gethostname()),
                     args.x11_display)

    cmds = []
    for run_number in selected_runs:
        run = runs[run_number]
        cmd = 'cd %s && ' % shell_escape(run['workingdir'])
        cmd += '/usr/bin/env -i '
        environ = x11.fix_env(run['environ'])
        cmd += ' '.join('%s=%s' % (k, shell_escape(v))
                        for k, v in iteritems(environ))
        cmd += ' '
        # FIXME : Use exec -a or something if binary != argv[0]
        if cmdline is None:
            argv = [run['binary']] + run['argv'][1:]
        else:
            argv = cmdline
        cmd += ' '.join(shell_escape(a) for a in argv)
        userspec = '%s:%s' % (run.get('uid', 1000),
                              run.get('gid', 1000))
        cmd = 'chroot --userspec=%s %s /bin/sh -c %s' % (
                userspec,
                shell_escape(unicode_(root)),
                shell_escape(cmd))
        cmds.append(cmd)
    cmds = ['chroot %s /bin/sh -c %s' % (shell_escape(unicode_(root)),
                                         shell_escape(c))
            for c in x11.init_cmds] + cmds
    cmds = ' && '.join(cmds)

    # Starts forwarding
    forwarders = []
    for portnum, connector in x11.port_forward:
        fwd = LocalForwarder(connector, portnum)
        forwarders.append(fwd)

    signals.pre_run(target=target)
    retcode = interruptible_call(cmds, shell=True)
    sys.stderr.write("\n*** Command finished, status: %d\n" % retcode)
    signals.post_run(target=target, retcode=retcode)
Example #3
0
    def finalize(self):
        if not self.docker_copy:
            self.build_directory.rmtree()
            return

        from_image = self.unpacked_info['current_image']

        with self.build_directory.open('w',
                                       'Dockerfile',
                                       encoding='utf-8',
                                       newline='\n') as dockerfile:
            dockerfile.write('FROM %s\n\n' % from_image.decode('ascii'))
            for src, target in self.docker_copy:
                # FIXME : spaces in filenames will probably break Docker
                dockerfile.write(
                    'COPY \\\n    %s \\\n    %s\n' % (shell_escape(
                        unicode_(src)), shell_escape(unicode_(target))))

            if self.docker_copy:
                dockerfile.write('RUN /busybox chown 1000:1000 \\\n'
                                 '    %s\n' % ' \\\n    '.join(
                                     shell_escape(unicode_(target))
                                     for src, target in self.docker_copy))

            # TODO : restore permissions?

        image = make_unique_name(b'reprounzip_image_')
        retcode = subprocess.call(self.docker_cmd +
                                  ['build', '-t', image, '.'],
                                  cwd=self.build_directory.path)
        if retcode != 0:
            logging.critical("docker build failed with code %d", retcode)
            sys.exit(1)
        else:
            logging.info("New image created: %s", image.decode('ascii'))
            if from_image != self.unpacked_info['initial_image']:
                logging.info("Untagging previous image %s",
                             from_image.decode('ascii'))
                retcode = subprocess.call(self.docker_cmd +
                                          ['rmi', from_image])
                if retcode != 0:
                    logging.warning(
                        "Can't remove previous image, docker "
                        "returned %d", retcode)
            self.unpacked_info['current_image'] = image
            write_dict(self.target, self.unpacked_info)

        self.build_directory.rmtree()
Example #4
0
    def finalize(self):
        if not self.docker_copy:
            self.build_directory.rmtree()
            return

        from_image = self.unpacked_info['current_image']

        with self.build_directory.open('w', 'Dockerfile',
                                       encoding='utf-8',
                                       newline='\n') as dockerfile:
            dockerfile.write('FROM %s\n\n' % from_image.decode('ascii'))
            for src, target in self.docker_copy:
                # FIXME : spaces in filenames will probably break Docker
                dockerfile.write(
                    'COPY \\\n    %s \\\n    %s\n' % (
                        shell_escape(unicode_(src)),
                        shell_escape(unicode_(target))))

            if self.docker_copy:
                dockerfile.write('RUN /busybox chown 1000:1000 \\\n'
                                 '    %s\n' % ' \\\n    '.join(
                                     shell_escape(unicode_(target))
                                     for src, target in self.docker_copy))

            # TODO : restore permissions?

        image = make_unique_name(b'reprounzip_image_')
        retcode = subprocess.call(self.docker_cmd +
                                  ['build', '-t', image, '.'],
                                  cwd=self.build_directory.path)
        if retcode != 0:
            logger.critical("docker build failed with code %d", retcode)
            sys.exit(1)
        else:
            logger.info("New image created: %s", image.decode('ascii'))
            if from_image != self.unpacked_info['initial_image']:
                logger.info("Untagging previous image %s",
                            from_image.decode('ascii'))
                retcode = subprocess.call(self.docker_cmd +
                                          ['rmi', from_image])
                if retcode != 0:
                    logger.warning("Can't remove previous image, docker "
                                   "returned %d", retcode)
            self.unpacked_info['current_image'] = image
            write_dict(self.target, self.unpacked_info)

        self.build_directory.rmtree()
Example #5
0
def graph_json(target, runs, packages, other_files, package_map, edges,
               inputs_outputs, level_pkgs, level_processes, level_other_files):
    """Writes a JSON file suitable for further processing.
    """
    # Packages
    json_packages = [pkg.json(level_pkgs) for pkg in packages]

    # Other files
    json_other_files = [unicode_(fi) for fi in sorted(other_files)]

    # Programs
    prog_map = {}
    json_runs = [run.json(prog_map, level_processes) for run in runs]

    # Connect edges
    for prog, f, mode, argv in edges:
        what = unicode_(f)
        if mode is None:
            prog_map[prog]['reads'].append(what)
            # TODO: argv?
        elif mode & FILE_WRITE:
            prog_map[prog]['writes'].append(what)
        elif mode & FILE_READ:
            prog_map[prog]['reads'].append(what)

    json_other_files.sort()

    if PY3:
        fp = target.open('w', encoding='utf-8', newline='\n')
    else:
        fp = target.open('wb')
    try:
        json.dump(
            {
                'packages': sorted(json_packages, key=lambda p: p['name']),
                'other_files': json_other_files,
                'runs': json_runs
            },
            fp,
            ensure_ascii=False,
            indent=2,
            sort_keys=True)
    finally:
        fp.close()
Example #6
0
 def json(self, level_pkgs):
     if level_pkgs == LVL_PKG_PACKAGE:
         logging.critical("JSON output doesn't support --packages package")
         sys.exit(1)
     elif level_pkgs == LVL_PKG_FILE:
         files = sorted(unicode_(f) for f in self.files)
     else:
         assert False
     return {'name': self.name, 'version': self.version or None,
             'files': files}
Example #7
0
 def json(self, level_pkgs):
     if level_pkgs == LVL_PKG_PACKAGE:
         logger.critical("JSON output doesn't support --packages package")
         sys.exit(1)
     elif level_pkgs == LVL_PKG_FILE:
         files = sorted(unicode_(f) for f in self.files)
     else:
         assert False
     return {'name': self.name, 'version': self.version or None,
             'section': self.section, 'files': files}
Example #8
0
def graph_json(target, runs, packages, other_files, package_map, edges,
               inputs_outputs, level_pkgs, level_processes, level_other_files):
    """Writes a JSON file suitable for further processing.
    """
    # Packages
    json_packages = [pkg.json(level_pkgs) for pkg in packages]

    # Other files
    json_other_files = [unicode_(fi) for fi in sorted(other_files)]

    # Programs
    prog_map = {}
    json_runs = [run.json(prog_map, level_processes) for run in runs]

    # Connect edges
    for prog, f, mode, argv in edges:
        what = unicode_(f)
        if mode is None:
            prog_map[prog]['reads'].append(what)
            # TODO: argv?
        elif mode & FILE_WRITE:
            prog_map[prog]['writes'].append(what)
        elif mode & FILE_READ:
            prog_map[prog]['reads'].append(what)

    json_other_files.sort()

    if PY3:
        fp = target.open('w', encoding='utf-8', newline='\n')
    else:
        fp = target.open('wb')
    try:
        json.dump({'packages': sorted(json_packages,
                                      key=lambda p: p['name']),
                   'other_files': json_other_files,
                   'runs': json_runs},
                  fp,
                  ensure_ascii=False,
                  indent=2,
                  sort_keys=True)
    finally:
        fp.close()
Example #9
0
 def dot(self, fp, level_processes, indent=1):
     thread_style = ',fillcolor="#666666"' if self.thread else ''
     fp.write('    ' * indent + 'prog%d [label="%s (%d)"%s];\n' % (
              self.id, escape(unicode_(self.binary) or "-"),
              self.pid, thread_style))
     if self.parent is not None:
         reason = ''
         if self.created == C_FORK:
             if self.thread:
                 reason = "thread"
             else:
                 reason = "fork"
         elif self.created == C_EXEC:
             reason = "exec"
         elif self.created == C_FORKEXEC:
             reason = "fork+exec"
         fp.write('    ' * indent + 'prog%d -> prog%d [label="%s"];\n' % (
                  self.parent.id, self.id, reason))
Example #10
0
 def dot(self, fp, level_processes, indent=1):
     thread_style = ',fillcolor="#666666"' if self.thread else ''
     fp.write('    ' * indent + 'prog%d [label="%s (%d)"%s];\n' %
              (self.id, escape(unicode_(self.binary) or "-"), self.pid,
               thread_style))
     if self.parent is not None:
         reason = ''
         if self.created == C_FORK:
             if self.thread:
                 reason = "thread"
             else:
                 reason = "fork"
         elif self.created == C_EXEC:
             reason = "exec"
         elif self.created == C_FORKEXEC:
             reason = "fork+exec"
         fp.write('    ' * indent + 'prog%d -> prog%d [label="%s"];\n' %
                  (self.parent.id, self.id, reason))
Example #11
0
 def filefilter(path):
     pathuni = unicode_(path)
     if any(f(pathuni) for f in ignore):
         logging.debug("IGN %s", pathuni)
         return None
     if not (replace or aggregates):
         return path
     for fi in replace:
         pathuni_ = fi(pathuni)
         if pathuni_ != pathuni:
             logging.debug("SUB %s -> %s", pathuni, pathuni_)
         pathuni = pathuni_
     for prefix in aggregates or []:
         if pathuni.startswith(prefix):
             logging.debug("AGG %s -> %s", pathuni, prefix)
             pathuni = prefix
             break
     return PosixPath(pathuni)
Example #12
0
 def filefilter(path):
     pathuni = unicode_(path)
     if any(f(pathuni) for f in ignore):
         logging.debug("IGN %s", pathuni)
         return None
     if not (replace or aggregates):
         return path
     for fi in replace:
         pathuni_ = fi(pathuni)
         if pathuni_ != pathuni:
             logging.debug("SUB %s -> %s", pathuni, pathuni_)
         pathuni = pathuni_
     for prefix in aggregates or []:
         if pathuni.startswith(prefix):
             logging.debug("AGG %s -> %s", pathuni, prefix)
             pathuni = prefix
             break
     return PosixPath(pathuni)
Example #13
0
    def dot(self, fp, level_pkgs):
        assert self.id is not None
        if not self.files:
            return

        if level_pkgs == LVL_PKG_PACKAGE:
            fp.write('    "pkg %s" [shape=box,label=' % escape(self.name))
            if self.version:
                fp.write('"%s %s"];\n' %
                         (escape(self.name), escape(self.version)))
            else:
                fp.write('"%s"];\n' % escape(self.name))
        elif level_pkgs == LVL_PKG_FILE:
            fp.write('    subgraph cluster_pkg%d {\n        label=' % self.id)
            if self.version:
                fp.write('"%s %s";\n' %
                         (escape(self.name), escape(self.version)))
            else:
                fp.write('"%s";\n' % escape(self.name))
            for f in sorted(unicode_(f) for f in self.files):
                fp.write('        "%s";\n' % escape(f))
            fp.write('    }\n')
Example #14
0
    def dot(self, fp, level_pkgs):
        assert self.id is not None
        if not self.files:
            return

        if level_pkgs == LVL_PKG_PACKAGE:
            fp.write('    "pkg %s" [shape=box,label=' % escape(self.name))
            if self.version:
                fp.write('"%s %s"];\n' % (
                         escape(self.name), escape(self.version)))
            else:
                fp.write('"%s"];\n' % escape(self.name))
        elif level_pkgs == LVL_PKG_FILE:
            fp.write('    subgraph cluster_pkg%d {\n        label=' % self.id)
            if self.version:
                fp.write('"%s %s";\n' % (
                         escape(self.name), escape(self.version)))
            else:
                fp.write('"%s";\n' % escape(self.name))
            for f in sorted(unicode_(f) for f in self.files):
                fp.write('        "%s";\n' % escape(f))
            fp.write('    }\n')
Example #15
0
def chroot_run(args):
    """Runs the command in the chroot.
    """
    target = Path(args.target[0])
    read_dict(target / '.reprounzip', 'chroot')
    cmdline = args.cmdline

    # Loads config
    runs, packages, other_files = load_config_file(target / 'config.yml', True)

    selected_runs = get_runs(runs, args.run, cmdline)

    root = target / 'root'

    cmds = []
    for run_number in selected_runs:
        run = runs[run_number]
        cmd = 'cd %s && ' % shell_escape(run['workingdir'])
        cmd += '/usr/bin/env -i '
        cmd += ' '.join('%s=%s' % (k, shell_escape(v))
                        for k, v in iteritems(run['environ']))
        cmd += ' '
        # FIXME : Use exec -a or something if binary != argv[0]
        if cmdline is None:
            argv = [run['binary']] + run['argv'][1:]
        else:
            argv = cmdline
        cmd += ' '.join(shell_escape(a) for a in argv)
        userspec = '%s:%s' % (run.get('uid', 1000), run.get('gid', 1000))
        cmd = 'chroot --userspec=%s %s /bin/sh -c %s' % (
            userspec, shell_escape(unicode_(root)), shell_escape(cmd))
        cmds.append(cmd)
    cmds = ' && '.join(cmds)

    signals.pre_run(target=target)
    retcode = subprocess.call(cmds, shell=True)
    sys.stderr.write("\n*** Command finished, status: %d\n" % retcode)
    signals.post_run(target=target, retcode=retcode)
Example #16
0
def graph_json(target, runs, packages, other_files, package_map, edges,
               inputs_outputs, inputs_outputs_map,
               level_pkgs, level_processes, level_other_files):
    """Writes a JSON file suitable for further processing.
    """
    # Packages
    if level_pkgs in (LVL_PKG_IGNORE, LVL_PKG_DROP):
        json_packages = []
    else:
        json_packages = [pkg.json(level_pkgs) for pkg in packages]

    # Other files
    json_other_files = [unicode_(fi) for fi in sorted(other_files)]

    # Programs
    prog_map = {}
    json_runs = [run.json(prog_map, level_processes) for run in runs]

    # Connect edges
    done_edges = set()
    for prog, fi, mode, argv in edges:
        endp_prog = prog_map[prog]
        if fi in package_map:
            if level_pkgs == LVL_PKG_DROP:
                continue
            endp_file = package_map[fi].json_endpoint(fi, level_pkgs)
            e = endp_prog['name'], endp_file, mode
            if e in done_edges:
                continue
            else:
                done_edges.add(e)
        else:
            endp_file = unicode_(fi)
        if mode is None:
            endp_prog['reads'].append(endp_file)
        elif mode & FILE_WRITE:
            endp_prog['writes'].append(endp_file)
        elif mode & FILE_READ:
            endp_prog['reads'].append(endp_file)

    json_other_files.sort()

    if PY3:
        fp = target.open('w', encoding='utf-8', newline='\n')
    else:
        fp = target.open('wb')
    try:
        json.dump({'packages': sorted(json_packages,
                                      key=lambda p: p['name']),
                   'other_files': json_other_files,
                   'runs': json_runs,
                   'inputs_outputs': [
                       {'name': k, 'path': unicode_(v.path),
                        'read_by_runs': v.read_runs,
                        'written_by_runs': v.write_runs}
                       for k, v in sorted(iteritems(inputs_outputs))]},
                  fp,
                  ensure_ascii=False,
                  indent=2,
                  sort_keys=True)
    finally:
        fp.close()
Example #17
0
def vagrant_setup_create(args):
    """Sets up the experiment to be run in a Vagrant-built virtual machine.

    This can either build a chroot or not.

    If building a chroot, we do just like without Vagrant: we copy all the
    files and only get what's missing from the host. But we do install
    automatically the packages whose files are required.

    If not building a chroot, we install all the packages, and only unpack
    files that don't come from packages.

    In short: files from packages with packfiles=True will only be used if
    building a chroot.
    """
    if not args.pack:
        logging.critical("setup/create needs the pack filename")
        sys.exit(1)

    pack = Path(args.pack[0])
    target = Path(args.target[0])
    if target.exists():
        logging.critical("Target directory exists")
        sys.exit(1)
    use_chroot = args.use_chroot
    mount_bind = args.bind_magic_dirs
    record_usage(use_chroot=use_chroot,
                 mount_bind=mount_bind)

    signals.pre_setup(target=target, pack=pack)

    # Unpacks configuration file
    rpz_pack = RPZPack(pack)
    rpz_pack.extract_config(target / 'config.yml')

    # Loads config
    runs, packages, other_files = config = load_config(target / 'config.yml',
                                                       True)

    if not args.memory:
        memory = None
    else:
        try:
            memory = int(args.memory[-1])
        except ValueError:
            logging.critical("Invalid value for memory size: %r", args.memory)
            sys.exit(1)

    if args.base_image and args.base_image[0]:
        record_usage(vagrant_explicit_image=True)
        box = args.base_image[0]
        if args.distribution:
            target_distribution = args.distribution[0]
        else:
            target_distribution = None
    else:
        target_distribution, box = select_box(runs)
    logging.info("Using box %s", box)
    logging.debug("Distribution: %s", target_distribution or "unknown")

    # If using chroot, we might still need to install packages to get missing
    # (not packed) files
    if use_chroot:
        packages = [pkg for pkg in packages if not pkg.packfiles]
        if packages:
            record_usage(vagrant_install_pkgs=True)
            logging.info("Some packages were not packed, so we'll install and "
                         "copy their files\n"
                         "Packages that are missing:\n%s",
                         ' '.join(pkg.name for pkg in packages))

    if packages:
        try:
            installer = select_installer(pack, runs, target_distribution)
        except CantFindInstaller as e:
            logging.error("Need to install %d packages but couldn't select a "
                          "package installer: %s",
                          len(packages), e)

    target.mkdir(parents=True)

    try:
        # Writes setup script
        logging.info("Writing setup script %s...", target / 'setup.sh')
        with (target / 'setup.sh').open('w', encoding='utf-8',
                                        newline='\n') as fp:
            fp.write('#!/bin/sh\n\nset -e\n\n')
            if packages:
                # Updates package sources
                fp.write(installer.update_script())
                fp.write('\n')
                # Installs necessary packages
                fp.write(installer.install_script(packages))
                fp.write('\n')
                # TODO : Compare package versions (painful because of sh)

            # Untar
            if use_chroot:
                fp.write('\n'
                         'mkdir /experimentroot; cd /experimentroot\n')
                fp.write('tar zpxf /vagrant/data.tgz --numeric-owner '
                         '--strip=1 %s\n' % rpz_pack.data_prefix)
                if mount_bind:
                    fp.write('\n'
                             'mkdir -p /experimentroot/dev\n'
                             'mkdir -p /experimentroot/proc\n')

                for pkg in packages:
                    fp.write('\n# Copies files from package %s\n' % pkg.name)
                    for f in pkg.files:
                        f = f.path
                        dest = join_root(PosixPath('/experimentroot'), f)
                        fp.write('mkdir -p %s\n' %
                                 shell_escape(unicode_(f.parent)))
                        fp.write('cp -L %s %s\n' % (
                                 shell_escape(unicode_(f)),
                                 shell_escape(unicode_(dest))))
            else:
                fp.write('\ncd /\n')
                paths = set()
                pathlist = []
                # Adds intermediate directories, and checks for existence in
                # the tar
                for f in other_files:
                    path = PosixPath('/')
                    for c in rpz_pack.remove_data_prefix(f.path).components:
                        path = path / c
                        if path in paths:
                            continue
                        paths.add(path)
                        try:
                            rpz_pack.get_data(path)
                        except KeyError:
                            logging.info("Missing file %s", path)
                        else:
                            pathlist.append(path)
                # FIXME : for some reason we need reversed() here, I'm not sure
                # why. Need to read more of tar's docs.
                # TAR bug: --no-overwrite-dir removes --keep-old-files
                # TAR bug: there is no way to make --keep-old-files not report
                # an error if an existing file is encountered. --skip-old-files
                # was introduced too recently. Instead, we just ignore the exit
                # status
                with (target / 'rpz-files.list').open('wb') as lfp:
                    for p in reversed(pathlist):
                        lfp.write(join_root(rpz_pack.data_prefix, p).path)
                        lfp.write(b'\0')
                fp.write('tar zpxf /vagrant/data.tgz --keep-old-files '
                         '--numeric-owner --strip=1 '
                         '--null -T /vagrant/rpz-files.list || /bin/true\n')

            # Copies busybox
            if use_chroot:
                arch = runs[0]['architecture']
                download_file(busybox_url(arch),
                              target / 'busybox',
                              'busybox-%s' % arch)
                fp.write(r'''
cp /vagrant/busybox /experimentroot/busybox
chmod +x /experimentroot/busybox
mkdir -p /experimentroot/bin
[ -e /experimentroot/bin/sh ] || \
    ln -s /busybox /experimentroot/bin/sh
''')

        # Copies pack
        logging.info("Copying pack file...")
        rpz_pack.copy_data_tar(target / 'data.tgz')

        rpz_pack.close()

        # Writes Vagrant file
        logging.info("Writing %s...", target / 'Vagrantfile')
        with (target / 'Vagrantfile').open('w', encoding='utf-8',
                                           newline='\n') as fp:
            # Vagrant header and version
            fp.write(
                '# -*- mode: ruby -*-\n'
                '# vi: set ft=ruby\n\n'
                'VAGRANTFILE_API_VERSION = "2"\n\n'
                'Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|\n')
            # Selects which box to install
            fp.write('  config.vm.box = "%s"\n' % box)
            # Run the setup script on the virtual machine
            fp.write('  config.vm.provision "shell", path: "setup.sh"\n')

            # Memory size
            if memory is not None:
                fp.write('  config.vm.provider "virtualbox" do |v|\n'
                         '    v.memory = %d\n'
                         '  end\n' % memory)

            fp.write('end\n')

        # Meta-data for reprounzip
        write_dict(target,
                   metadata_initial_iofiles(config,
                                            {'use_chroot': use_chroot}))

        signals.post_setup(target=target, pack=pack)
    except Exception:
        target.rmtree(ignore_errors=True)
        raise
Example #18
0
def directory_run(args):
    """Runs the command in the directory.
    """
    target = Path(args.target[0])
    unpacked_info = metadata_read(target, 'directory')
    cmdline = args.cmdline

    # Loads config
    config = load_config_file(target / 'config.yml', True)
    runs = config.runs

    selected_runs = get_runs(runs, args.run, cmdline)

    root = (target / 'root').absolute()

    # Gets library paths
    lib_dirs = []
    p = subprocess.Popen(['/sbin/ldconfig', '-v', '-N'],
                         stdout=subprocess.PIPE)
    try:
        for l in p.stdout:
            if len(l) < 3 or l[0] in (b' ', b'\t'):
                continue
            if l.endswith(b':\n'):
                lib_dirs.append(Path(l[:-2]))
    finally:
        p.wait()
    lib_dirs = (
        'export LD_LIBRARY_PATH=%s' %
        ':'.join(shell_escape(unicode_(join_root(root, d))) for d in lib_dirs))

    cmds = [lib_dirs]
    for run_number in selected_runs:
        run = runs[run_number]
        cmd = 'cd %s && ' % shell_escape(
            unicode_(join_root(root, Path(run['workingdir']))))
        cmd += '/usr/bin/env -i '
        environ = run['environ']
        environ = fixup_environment(environ, args)
        if args.x11:
            if 'DISPLAY' in os.environ:
                environ['DISPLAY'] = os.environ['DISPLAY']
            if 'XAUTHORITY' in os.environ:
                environ['XAUTHORITY'] = os.environ['XAUTHORITY']
        cmd += ' '.join('%s=%s' % (shell_escape(k), shell_escape(v))
                        for k, v in iteritems(environ) if k != 'PATH')
        cmd += ' '

        # PATH
        # Get the original PATH components
        path = [
            PosixPath(d) for d in run['environ'].get('PATH', '').split(':')
        ]
        # The same paths but in the directory
        dir_path = [join_root(root, d) for d in path if d.root == '/']
        # Rebuild string
        path = ':'.join(unicode_(d) for d in dir_path + path)
        cmd += 'PATH=%s ' % shell_escape(path)

        # FIXME : Use exec -a or something if binary != argv[0]
        if cmdline is None:
            argv = run['argv']

            # Rewrites command-line arguments that are absolute filenames
            rewritten = False
            for i in irange(len(argv)):
                try:
                    p = Path(argv[i])
                except UnicodeEncodeError:
                    continue
                if p.is_absolute:
                    rp = join_root(root, p)
                    if (rp.exists() or
                        (len(rp.components) > 3 and rp.parent.exists())):
                        argv[i] = str(rp)
                        rewritten = True
            if rewritten:
                logger.warning("Rewrote command-line as: %s",
                               ' '.join(shell_escape(a) for a in argv))
        else:
            argv = cmdline
        cmd += ' '.join(shell_escape(a) for a in argv)
        cmds.append(cmd)
    cmds = ' && '.join(cmds)

    signals.pre_run(target=target)
    retcode = interruptible_call(cmds, shell=True)
    stderr.write("\n*** Command finished, status: %d\n" % retcode)
    signals.post_run(target=target, retcode=retcode)

    # Update input file status
    metadata_update_run(config, unpacked_info, selected_runs)
    metadata_write(target, unpacked_info, 'directory')
Example #19
0
def directory_run(args):
    """Runs the command in the directory.
    """
    target = Path(args.target[0])
    unpacked_info = metadata_read(target, 'directory')
    cmdline = args.cmdline

    # Loads config
    config = load_config_file(target / 'config.yml', True)
    runs = config.runs

    selected_runs = get_runs(runs, args.run, cmdline)

    root = (target / 'root').absolute()

    # Gets library paths
    lib_dirs = []
    p = subprocess.Popen(['/sbin/ldconfig', '-v', '-N'],
                         stdout=subprocess.PIPE)
    try:
        for l in p.stdout:
            if len(l) < 3 or l[0] in (b' ', b'\t'):
                continue
            if l.endswith(b':\n'):
                lib_dirs.append(Path(l[:-2]))
    finally:
        p.communicate()
    lib_dirs = ('export LD_LIBRARY_PATH=%s' % ':'.join(
                shell_escape(unicode_(join_root(root, d)))
                for d in lib_dirs))

    cmds = [lib_dirs]
    for run_number in selected_runs:
        run = runs[run_number]
        cmd = 'cd %s && ' % shell_escape(
            unicode_(join_root(root,
                               Path(run['workingdir']))))
        cmd += '/usr/bin/env -i '
        environ = run['environ']
        environ = fixup_environment(environ, args)
        if args.x11:
            if 'DISPLAY' in os.environ:
                environ['DISPLAY'] = os.environ['DISPLAY']
            if 'XAUTHORITY' in os.environ:
                environ['XAUTHORITY'] = os.environ['XAUTHORITY']
        cmd += ' '.join('%s=%s' % (shell_escape(k), shell_escape(v))
                        for k, v in iteritems(environ)
                        if k != 'PATH')
        cmd += ' '

        # PATH
        # Get the original PATH components
        path = [PosixPath(d)
                for d in run['environ'].get('PATH', '').split(':')]
        # The same paths but in the directory
        dir_path = [join_root(root, d)
                    for d in path
                    if d.root == '/']
        # Rebuild string
        path = ':'.join(unicode_(d) for d in dir_path + path)
        cmd += 'PATH=%s ' % shell_escape(path)

        # FIXME : Use exec -a or something if binary != argv[0]
        if cmdline is None:
            argv = run['argv']

            # Rewrites command-line arguments that are absolute filenames
            rewritten = False
            for i in irange(len(argv)):
                try:
                    p = Path(argv[i])
                except UnicodeEncodeError:
                    continue
                if p.is_absolute:
                    rp = join_root(root, p)
                    if (rp.exists() or
                            (len(rp.components) > 3 and rp.parent.exists())):
                        argv[i] = str(rp)
                        rewritten = True
            if rewritten:
                logging.warning("Rewrote command-line as: %s",
                                ' '.join(shell_escape(a) for a in argv))
        else:
            argv = cmdline
        cmd += ' '.join(shell_escape(a) for a in argv)
        cmds.append(cmd)
    cmds = ' && '.join(cmds)

    signals.pre_run(target=target)
    retcode = interruptible_call(cmds, shell=True)
    stderr.write("\n*** Command finished, status: %d\n" % retcode)
    signals.post_run(target=target, retcode=retcode)

    # Update input file status
    metadata_update_run(config, unpacked_info, selected_runs)
    metadata_write(target, unpacked_info, 'directory')
Example #20
0
def docker_setup_create(args):
    """Sets up the experiment to be run in a Docker-built container.
    """
    pack = Path(args.pack[0])
    target = Path(args.target[0])
    if target.exists():
        logging.critical("Target directory exists")
        sys.exit(1)

    signals.pre_setup(target=target, pack=pack)

    # Unpacks configuration file
    tar = tarfile.open(str(pack), 'r:*')
    member = tar.getmember('METADATA/config.yml')
    member.name = 'config.yml'
    tar.extract(member, str(target))
    tar.close()

    # Loads config
    runs, packages, other_files = load_config(target / 'config.yml', True)

    if args.base_image:
        record_usage(docker_explicit_base=True)
        base_image = args.base_image[0]
        if args.distribution:
            target_distribution = args.distribution[0]
        else:
            target_distribution = None
    else:
        target_distribution, base_image = select_image(runs)
    logging.info("Using base image %s", base_image)
    logging.debug("Distribution: %s", target_distribution or "unknown")

    target.mkdir(parents=True)
    pack.copyfile(target / 'experiment.rpz')

    # Writes Dockerfile
    logging.info("Writing %s...", target / 'Dockerfile')
    with (target / 'Dockerfile').open('w',
                                      encoding='utf-8', newline='\n') as fp:
        fp.write('FROM %s\n\n' % base_image)

        # Installs busybox
        download_file(busybox_url(runs[0]['architecture']),
                      target / 'busybox')
        fp.write('COPY busybox /bin/busybox\n')

        fp.write('COPY experiment.rpz /reprozip_experiment.rpz\n\n')
        fp.write('RUN \\\n'
                 '    chmod +x /bin/busybox && \\\n')

        if args.install_pkgs:
            # Install every package through package manager
            missing_packages = []
        else:
            # Only install packages that were not packed
            missing_packages = [pkg for pkg in packages if pkg.packfiles]
            packages = [pkg for pkg in packages if not pkg.packfiles]
        # FIXME : Right now, we need 'sudo' to be available (and it's not
        # necessarily in the base image)
        if packages:
            record_usage(docker_install_pkgs=True)
        else:
            record_usage(docker_install_pkgs="sudo")
        packages += [Package('sudo', None, packfiles=False)]
        if packages:
            try:
                installer = select_installer(pack, runs, target_distribution)
            except CantFindInstaller as e:
                logging.error("Need to install %d packages but couldn't "
                              "select a package installer: %s",
                              len(packages), e)
                sys.exit(1)
            # Updates package sources
            fp.write('    %s && \\\n' % installer.update_script())
            # Installs necessary packages
            fp.write('    %s && \\\n' % installer.install_script(packages))
        logging.info("Dockerfile will install the %d software packages that "
                     "were not packed", len(packages))

        # Untar
        paths = set()
        pathlist = []
        dataroot = PosixPath('DATA')
        # Adds intermediate directories, and checks for existence in the tar
        tar = tarfile.open(str(pack), 'r:*')
        missing_files = chain.from_iterable(pkg.files
                                            for pkg in missing_packages)
        for f in chain(other_files, missing_files):
            path = PosixPath('/')
            for c in f.path.components[1:]:
                path = path / c
                if path in paths:
                    continue
                paths.add(path)
                datapath = join_root(dataroot, path)
                try:
                    tar.getmember(str(datapath))
                except KeyError:
                    logging.info("Missing file %s", datapath)
                else:
                    pathlist.append(unicode_(datapath))
        tar.close()
        # FIXME : for some reason we need reversed() here, I'm not sure why.
        # Need to read more of tar's docs.
        # TAR bug: --no-overwrite-dir removes --keep-old-files
        fp.write('    cd / && tar zpxf /reprozip_experiment.rpz '
                 '--numeric-owner --strip=1 %s\n' %
                 ' '.join(shell_escape(p) for p in reversed(pathlist)))

    # Meta-data for reprounzip
    write_dict(target / '.reprounzip', {})

    signals.post_setup(target=target)
Example #21
0
def directory_run(args):
    """Runs the command in the directory.
    """
    target = Path(args.target[0])
    read_dict(target / '.reprounzip', 'directory')
    cmdline = args.cmdline

    # Loads config
    runs, packages, other_files = load_config_file(target / 'config.yml', True)

    selected_runs = get_runs(runs, args.run, cmdline)

    root = target / 'root'

    # Gets library paths
    lib_dirs = []
    p = subprocess.Popen(['/sbin/ldconfig', '-v', '-N'],
                         stdout=subprocess.PIPE)
    try:
        for l in p.stdout:
            if len(l) < 3 or l[0] in (b' ', b'\t'):
                continue
            if l.endswith(b':\n'):
                lib_dirs.append(Path(l[:-2]))
    finally:
        p.wait()
    lib_dirs = (
        'export LD_LIBRARY_PATH=%s' %
        ':'.join(shell_escape(unicode_(join_root(root, d))) for d in lib_dirs))

    cmds = [lib_dirs]
    for run_number in selected_runs:
        run = runs[run_number]
        cmd = 'cd %s && ' % shell_escape(
            unicode_(join_root(root, Path(run['workingdir']))))
        cmd += '/usr/bin/env -i '
        cmd += ' '.join('%s=%s' % (k, shell_escape(v))
                        for k, v in iteritems(run['environ']) if k != 'PATH')
        cmd += ' '

        # PATH
        # Get the original PATH components
        path = [
            PosixPath(d) for d in run['environ'].get('PATH', '').split(':')
        ]
        # The same paths but in the directory
        dir_path = [join_root(root, d) for d in path if d.root == '/']
        # Rebuild string
        path = ':'.join(unicode_(d) for d in dir_path + path)
        cmd += 'PATH=%s ' % shell_escape(path)

        # FIXME : Use exec -a or something if binary != argv[0]
        if cmdline is None:
            argv = run['argv']
        else:
            argv = cmdline
        cmd += ' '.join(shell_escape(a) for a in argv)
        cmds.append(cmd)
    cmds = ' && '.join(cmds)

    signals.pre_run(target=target)
    retcode = subprocess.call(cmds, shell=True)
    sys.stderr.write("\n*** Command finished, status: %d\n" % retcode)
    signals.post_run(target=target, retcode=retcode)
Example #22
0
def generate(target, directory, all_forks=False):
    """Main function for the graph subcommand.
    """
    # In here, a file is any file on the filesystem. A binary is a file, that
    # gets executed. A process is a system-level task, identified by its pid
    # (pids don't get reused in the database).
    # What I call program is the couple (process, binary), so forking creates a
    # new program (with the same binary) and exec'ing creates a new program as
    # well (with the same process)
    # Because of this, fork+exec will create an intermediate program that
    # doesn't do anything (new process but still old binary). If that program
    # doesn't do anything worth showing on the graph, it will be erased, unless
    # all_forks is True (--all-forks).

    database = directory / 'trace.sqlite3'

    # Reads package ownership from the configuration
    configfile = directory / 'config.yml'
    if not configfile.is_file():
        logging.critical("Configuration file does not exist!\n"
                         "Did you forget to run 'reprozip trace'?\n"
                         "If not, you might want to use --dir to specify an "
                         "alternate location.")
        sys.exit(1)
    runs, packages, other_files, patterns = load_config(configfile,
                                                        canonical=False)
    packages = dict((f.path, pkg) for pkg in packages for f in pkg.files)

    if PY3:
        # On PY3, connect() only accepts unicode
        conn = sqlite3.connect(str(database))
    else:
        conn = sqlite3.connect(database.path)

    # This is a bit weird. We need to iterate on all types of events at the
    # same time, ordering by timestamp, so we decorate-sort-undecorate
    # Decoration adds timestamp (for sorting) and tags by event type, one of
    # 'process', 'open' or 'exec'

    # Reads processes from the database
    process_cursor = conn.cursor()
    process_rows = process_cursor.execute(
            '''
            SELECT id, parent, timestamp
            FROM processes
            ORDER BY id
            ''')
    processes = {}
    all_programs = []

    # ... and opened files...
    file_cursor = conn.cursor()
    file_rows = file_cursor.execute(
            '''
            SELECT name, timestamp, mode, process
            FROM opened_files
            ORDER BY id
            ''')
    binaries = set()
    files = OrderedSet()
    edges = OrderedSet()

    # ... as well as executed files.
    exec_cursor = conn.cursor()
    exec_rows = exec_cursor.execute(
            '''
            SELECT name, timestamp, process, argv
            FROM executed_files
            ORDER BY id
            ''')

    # Loop on all event lists
    logging.info("Getting all events from database...")
    rows = heapq.merge(((r[2], 'process', r) for r in process_rows),
                       ((r[1], 'open', r) for r in file_rows),
                       ((r[1], 'exec', r) for r in exec_rows))
    for ts, event_type, data in rows:
        if event_type == 'process':
            r_id, r_parent, r_timestamp = data
            if r_parent is not None:
                parent = processes[r_parent]
                binary = parent.binary
            else:
                parent = None
                binary = None
            p = Process(r_id,
                        parent,
                        r_timestamp,
                        False,
                        binary,
                        C_INITIAL if r_parent is None else C_FORK)
            processes[r_id] = p
            all_programs.append(p)

        elif event_type == 'open':
            r_name, r_timestamp, r_mode, r_process = data
            r_name = PosixPath(r_name)
            if r_mode != FILE_WDIR:
                process = processes[r_process]
                files.add(r_name)
                edges.add((process, r_name, r_mode, None))

        elif event_type == 'exec':
            r_name, r_timestamp, r_process, r_argv = data
            r_name = PosixPath(r_name)
            process = processes[r_process]
            binaries.add(r_name)
            # Here we split this process in two "programs", unless the previous
            # one hasn't done anything since it was created via fork()
            if not all_forks and not process.acted:
                process.binary = r_name
                process.created = C_FORKEXEC
                process.acted = True
            else:
                process = Process(process.pid,
                                  process,
                                  r_timestamp,
                                  True,         # Hides exec only once
                                  r_name,
                                  C_EXEC)
                all_programs.append(process)
                processes[r_process] = process
            argv = tuple(r_argv.split('\0'))
            if not argv[-1]:
                argv = argv[:-1]
            edges.add((process, r_name, None, argv))

    process_cursor.close()
    file_cursor.close()
    conn.close()

    # Puts files in packages
    logging.info("Organizes packages...")
    package_files = {}
    other_files = []
    for f in files:
        pkg = packages.get(f)
        if pkg is not None:
            package_files.setdefault((pkg.name, pkg.version), []).append(f)
        else:
            other_files.append(f)

    # Writes DOT file
    with target.open('w', encoding='utf-8', newline='\n') as fp:
        fp.write('digraph G {\n    /* programs */\n    node [shape=box];\n')
        # Programs
        logging.info("Writing programs...")
        for program in all_programs:
            fp.write('    prog%d [label="%s (%d)"];\n' % (
                     id(program), program.binary or "-", program.pid))
            if program.parent is not None:
                reason = ''
                if program.created == C_FORK:
                    reason = "fork"
                elif program.created == C_EXEC:
                    reason = "exec"
                elif program.created == C_FORKEXEC:
                    reason = "fork+exec"
                fp.write('    prog%d -> prog%d [label="%s"];\n' % (
                         id(program.parent), id(program), reason))

        fp.write('\n    node [shape=ellipse];\n\n    /* system packages */\n')

        # Files from packages
        logging.info("Writing packages...")
        for i, ((name, version), files) in enumerate(iteritems(package_files)):
            fp.write('    subgraph cluster%d {\n        label=' % i)
            if version:
                fp.write('"%s %s";\n' % (escape(name), escape(version)))
            else:
                fp.write('"%s";\n' % escape(name))
            for f in files:
                fp.write('        "%s";\n' % escape(unicode_(f)))
            fp.write('    }\n')

        fp.write('\n    /* other files */\n')

        # Other files
        logging.info("Writing other files...")
        for f in other_files:
            fp.write('    "%s"\n' % escape(unicode_(f)))

        fp.write('\n')

        # Edges
        logging.info("Connecting edges...")
        for prog, f, mode, argv in edges:
            if mode is None:
                fp.write('    "%s" -> prog%d [color=blue, label="%s"];\n' % (
                         escape(unicode_(f)),
                         id(prog),
                         escape(' '.join(argv))))
            elif mode & FILE_WRITE:
                fp.write('    prog%d -> "%s" [color=red];\n' % (
                         id(prog), escape(unicode_(f))))
            elif mode & FILE_READ:
                fp.write('    "%s" -> prog%d [color=green];\n' % (
                         escape(unicode_(f)), id(prog)))

        fp.write('}\n')
Example #23
0
 def dot_endpoint(self, f, level_pkgs):
     if level_pkgs == LVL_PKG_PACKAGE:
         return '"pkg %s"' % escape(self.name)
     else:
         return '"%s"' % escape(unicode_(f))
Example #24
0
def vagrant_setup_create(args):
    """Sets up the experiment to be run in a Vagrant-built virtual machine.

    This can either build a chroot or not.

    If building a chroot, we do just like without Vagrant: we copy all the
    files and only get what's missing from the host. But we do install
    automatically the packages whose files are required.

    If not building a chroot, we install all the packages, and only unpack
    files that don't come from packages.

    In short: files from packages with packfiles=True will only be used if
    building a chroot.
    """
    if not args.pack:
        logging.critical("setup/create needs the pack filename")
        sys.exit(1)

    pack = Path(args.pack[0])
    target = Path(args.target[0])
    if target.exists():
        logging.critical("Target directory exists")
        sys.exit(1)
    use_chroot = args.use_chroot
    mount_bind = args.bind_magic_dirs
    record_usage(use_chroot=use_chroot, mount_bind=mount_bind)

    signals.pre_setup(target=target, pack=pack)

    # Unpacks configuration file
    tar = tarfile.open(str(pack), 'r:*')
    member = tar.getmember('METADATA/config.yml')
    member.name = 'config.yml'
    tar.extract(member, str(target))
    tar.close()

    # Loads config
    runs, packages, other_files = load_config(target / 'config.yml', True)

    if args.base_image and args.base_image[0]:
        record_usage(vagrant_explicit_image=True)
        box = args.base_image[0]
        if args.distribution:
            target_distribution = args.distribution[0]
        else:
            target_distribution = None
    else:
        target_distribution, box = select_box(runs)
    logging.info("Using box %s", box)
    logging.debug("Distribution: %s", target_distribution or "unknown")

    # If using chroot, we might still need to install packages to get missing
    # (not packed) files
    if use_chroot:
        packages = [pkg for pkg in packages if not pkg.packfiles]
        if packages:
            record_usage(vagrant_install_pkgs=True)
            logging.info(
                "Some packages were not packed, so we'll install and "
                "copy their files\n"
                "Packages that are missing:\n%s",
                ' '.join(pkg.name for pkg in packages))

    if packages:
        try:
            installer = select_installer(pack, runs, target_distribution)
        except CantFindInstaller as e:
            logging.error(
                "Need to install %d packages but couldn't select a "
                "package installer: %s", len(packages), e)

    target.mkdir(parents=True)

    # Writes setup script
    logging.info("Writing setup script %s...", target / 'setup.sh')
    with (target / 'setup.sh').open('w', encoding='utf-8', newline='\n') as fp:
        fp.write('#!/bin/sh\n\nset -e\n\n')
        if packages:
            # Updates package sources
            fp.write(installer.update_script())
            fp.write('\n')
            # Installs necessary packages
            fp.write(installer.install_script(packages))
            fp.write('\n')
            # TODO : Compare package versions (painful because of sh)

        # Untar
        if use_chroot:
            fp.write('\n' 'mkdir /experimentroot; cd /experimentroot\n')
            fp.write('tar zpxf /vagrant/experiment.rpz '
                     '--numeric-owner --strip=1 DATA\n')
            if mount_bind:
                fp.write('\n'
                         'mkdir -p /experimentroot/dev\n'
                         'mount -o rbind /dev /experimentroot/dev\n'
                         'mkdir -p /experimentroot/proc\n'
                         'mount -o rbind /proc /experimentroot/proc\n')

            for pkg in packages:
                fp.write('\n# Copies files from package %s\n' % pkg.name)
                for f in pkg.files:
                    f = f.path
                    dest = join_root(PosixPath('/experimentroot'), f)
                    fp.write('mkdir -p %s\n' %
                             shell_escape(unicode_(f.parent)))
                    fp.write('cp -L %s %s\n' % (shell_escape(
                        unicode_(f)), shell_escape(unicode_(dest))))
        else:
            fp.write('\ncd /\n')
            paths = set()
            pathlist = []
            dataroot = PosixPath('DATA')
            # Adds intermediate directories, and checks for existence in the
            # tar
            tar = tarfile.open(str(pack), 'r:*')
            for f in other_files:
                path = PosixPath('/')
                for c in f.path.components[1:]:
                    path = path / c
                    if path in paths:
                        continue
                    paths.add(path)
                    datapath = join_root(dataroot, path)
                    try:
                        tar.getmember(str(datapath))
                    except KeyError:
                        logging.info("Missing file %s", datapath)
                    else:
                        pathlist.append(unicode_(datapath))
            tar.close()
            # FIXME : for some reason we need reversed() here, I'm not sure
            # why. Need to read more of tar's docs.
            # TAR bug: --no-overwrite-dir removes --keep-old-files
            # TAR bug: there is no way to make --keep-old-files not report an
            # error if an existing file is encountered. --skip-old-files was
            # introduced too recently. Instead, we just ignore the exit status
            fp.write('tar zpxf /vagrant/experiment.rpz --keep-old-files '
                     '--numeric-owner --strip=1 %s || /bin/true\n' %
                     ' '.join(shell_escape(p) for p in reversed(pathlist)))

        # Copies /bin/sh + dependencies
        if use_chroot:
            url = busybox_url(runs[0]['architecture'])
            fp.write(r'''
mkdir -p /experimentroot/bin
mkdir -p /experimentroot/usr/bin
if [ ! -e /experimentroot/bin/sh -o ! -e /experimentroot/usr/bin/env ]; then
    wget --quiet -O /experimentroot/bin/busybox {url}
    chmod +x /experimentroot/bin/busybox
fi
[ -e /experimentroot/bin/sh ] || \
    ln -s /bin/busybox /experimentroot/bin/sh
[ -e /experimentroot/usr/bin/env ] || \
    ln -s /bin/busybox /experimentroot/usr/bin/env
'''.format(url=url))

    # Copies pack
    logging.info("Copying pack file...")
    pack.copyfile(target / 'experiment.rpz')

    # Writes Vagrant file
    logging.info("Writing %s...", target / 'Vagrantfile')
    with (target / 'Vagrantfile').open('w', encoding='utf-8',
                                       newline='\n') as fp:
        # Vagrant header and version
        fp.write('# -*- mode: ruby -*-\n'
                 '# vi: set ft=ruby\n\n'
                 'VAGRANTFILE_API_VERSION = "2"\n\n'
                 'Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|\n')
        # Selects which box to install
        fp.write('  config.vm.box = "%s"\n' % box)
        # Run the setup script on the virtual machine
        fp.write('  config.vm.provision "shell", path: "setup.sh"\n')

        fp.write('end\n')

    # Meta-data for reprounzip
    write_dict(target / '.reprounzip', {'use_chroot': use_chroot})

    signals.post_setup(target=target)
Example #25
0
 def json_endpoint(self, f, level_pkgs):
     if level_pkgs == LVL_PKG_PACKAGE:
         return self.name
     else:
         return unicode_(f)
Example #26
0
def graph_json(target, runs, packages, other_files, package_map, edges,
               inputs_outputs, inputs_outputs_map, level_pkgs, level_processes,
               level_other_files):
    """Writes a JSON file suitable for further processing.
    """
    # Packages
    if level_pkgs in (LVL_PKG_IGNORE, LVL_PKG_DROP):
        json_packages = []
    else:
        json_packages = [pkg.json(level_pkgs) for pkg in packages]

    # Other files
    json_other_files = [unicode_(fi) for fi in sorted(other_files)]

    # Programs
    prog_map = {}
    json_runs = [run.json(prog_map, level_processes) for run in runs]

    # Connect edges
    done_edges = set()
    for prog, fi, mode, argv in edges:
        endp_prog = prog_map[prog]
        if fi in package_map:
            if level_pkgs == LVL_PKG_DROP:
                continue
            endp_file = package_map[fi].json_endpoint(fi, level_pkgs)
            e = endp_prog['name'], endp_file, mode
            if e in done_edges:
                continue
            else:
                done_edges.add(e)
        else:
            endp_file = unicode_(fi)
        if mode is None:
            endp_prog['reads'].append(endp_file)
            # TODO: argv?
        elif mode & FILE_WRITE:
            endp_prog['writes'].append(endp_file)
        elif mode & FILE_READ:
            endp_prog['reads'].append(endp_file)

    json_other_files.sort()

    if PY3:
        fp = target.open('w', encoding='utf-8', newline='\n')
    else:
        fp = target.open('wb')
    try:
        json.dump(
            {
                'packages':
                sorted(json_packages, key=lambda p: p['name']),
                'other_files':
                json_other_files,
                'runs':
                json_runs,
                'inputs_outputs': [{
                    'name': k,
                    'path': unicode_(v.path),
                    'read_by_runs': v.read_runs,
                    'written_by_runs': v.write_runs
                } for k, v in sorted(iteritems(inputs_outputs))]
            },
            fp,
            ensure_ascii=False,
            indent=2,
            sort_keys=True)
    finally:
        fp.close()
Example #27
0
def vagrant_setup_create(args):
    """Sets up the experiment to be run in a Vagrant-built virtual machine.

    This can either build a chroot or not.

    If building a chroot, we do just like without Vagrant: we copy all the
    files and only get what's missing from the host. But we do install
    automatically the packages whose files are required.

    If not building a chroot, we install all the packages, and only unpack
    files that don't come from packages.

    In short: files from packages with packfiles=True will only be used if
    building a chroot.
    """
    if not args.pack:
        logging.critical("setup/create needs the pack filename")
        sys.exit(1)

    pack = Path(args.pack[0])
    target = Path(args.target[0])
    if target.exists():
        logging.critical("Target directory exists")
        sys.exit(1)
    use_chroot = args.use_chroot
    mount_bind = args.bind_magic_dirs
    record_usage(use_chroot=use_chroot,
                 mount_bind=mount_bind)

    signals.pre_setup(target=target, pack=pack)

    # Unpacks configuration file
    tar = tarfile.open(str(pack), 'r:*')
    member = tar.getmember('METADATA/config.yml')
    member.name = 'config.yml'
    tar.extract(member, str(target))
    tar.close()

    # Loads config
    runs, packages, other_files = load_config(target / 'config.yml', True)

    if args.base_image and args.base_image[0]:
        record_usage(vagrant_explicit_image=True)
        box = args.base_image[0]
        if args.distribution:
            target_distribution = args.distribution[0]
        else:
            target_distribution = None
    else:
        target_distribution, box = select_box(runs)
    logging.info("Using box %s", box)
    logging.debug("Distribution: %s", target_distribution or "unknown")

    # If using chroot, we might still need to install packages to get missing
    # (not packed) files
    if use_chroot:
        packages = [pkg for pkg in packages if not pkg.packfiles]
        if packages:
            record_usage(vagrant_install_pkgs=True)
            logging.info("Some packages were not packed, so we'll install and "
                         "copy their files\n"
                         "Packages that are missing:\n%s",
                         ' '.join(pkg.name for pkg in packages))

    if packages:
        try:
            installer = select_installer(pack, runs, target_distribution)
        except CantFindInstaller as e:
            logging.error("Need to install %d packages but couldn't select a "
                          "package installer: %s",
                          len(packages), e)

    target.mkdir(parents=True)

    # Writes setup script
    logging.info("Writing setup script %s...", target / 'setup.sh')
    with (target / 'setup.sh').open('w', encoding='utf-8', newline='\n') as fp:
        fp.write('#!/bin/sh\n\nset -e\n\n')
        if packages:
            # Updates package sources
            fp.write(installer.update_script())
            fp.write('\n')
            # Installs necessary packages
            fp.write(installer.install_script(packages))
            fp.write('\n')
            # TODO : Compare package versions (painful because of sh)

        # Untar
        if use_chroot:
            fp.write('\n'
                     'mkdir /experimentroot; cd /experimentroot\n')
            fp.write('tar zpxf /vagrant/experiment.rpz '
                     '--numeric-owner --strip=1 DATA\n')
            if mount_bind:
                fp.write('\n'
                         'mkdir -p /experimentroot/dev\n'
                         'mount -o rbind /dev /experimentroot/dev\n'
                         'mkdir -p /experimentroot/proc\n'
                         'mount -o rbind /proc /experimentroot/proc\n')

            for pkg in packages:
                fp.write('\n# Copies files from package %s\n' % pkg.name)
                for f in pkg.files:
                    f = f.path
                    dest = join_root(PosixPath('/experimentroot'), f)
                    fp.write('mkdir -p %s\n' %
                             shell_escape(unicode_(f.parent)))
                    fp.write('cp -L %s %s\n' % (
                             shell_escape(unicode_(f)),
                             shell_escape(unicode_(dest))))
        else:
            fp.write('\ncd /\n')
            paths = set()
            pathlist = []
            dataroot = PosixPath('DATA')
            # Adds intermediate directories, and checks for existence in the
            # tar
            tar = tarfile.open(str(pack), 'r:*')
            for f in other_files:
                path = PosixPath('/')
                for c in f.path.components[1:]:
                    path = path / c
                    if path in paths:
                        continue
                    paths.add(path)
                    datapath = join_root(dataroot, path)
                    try:
                        tar.getmember(str(datapath))
                    except KeyError:
                        logging.info("Missing file %s", datapath)
                    else:
                        pathlist.append(unicode_(datapath))
            tar.close()
            # FIXME : for some reason we need reversed() here, I'm not sure
            # why. Need to read more of tar's docs.
            # TAR bug: --no-overwrite-dir removes --keep-old-files
            # TAR bug: there is no way to make --keep-old-files not report an
            # error if an existing file is encountered. --skip-old-files was
            # introduced too recently. Instead, we just ignore the exit status
            fp.write('tar zpxf /vagrant/experiment.rpz --keep-old-files '
                     '--numeric-owner --strip=1 %s || /bin/true\n' %
                     ' '.join(shell_escape(p) for p in reversed(pathlist)))

        # Copies /bin/sh + dependencies
        if use_chroot:
            url = busybox_url(runs[0]['architecture'])
            fp.write(r'''
mkdir -p /experimentroot/bin
mkdir -p /experimentroot/usr/bin
if [ ! -e /experimentroot/bin/sh -o ! -e /experimentroot/usr/bin/env ]; then
    wget --quiet -O /experimentroot/bin/busybox {url}
    chmod +x /experimentroot/bin/busybox
fi
[ -e /experimentroot/bin/sh ] || \
    ln -s /bin/busybox /experimentroot/bin/sh
[ -e /experimentroot/usr/bin/env ] || \
    ln -s /bin/busybox /experimentroot/usr/bin/env
'''.format(url=url))

    # Copies pack
    logging.info("Copying pack file...")
    pack.copyfile(target / 'experiment.rpz')

    # Writes Vagrant file
    logging.info("Writing %s...", target / 'Vagrantfile')
    with (target / 'Vagrantfile').open('w', encoding='utf-8',
                                       newline='\n') as fp:
        # Vagrant header and version
        fp.write('# -*- mode: ruby -*-\n'
                 '# vi: set ft=ruby\n\n'
                 'VAGRANTFILE_API_VERSION = "2"\n\n'
                 'Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|\n')
        # Selects which box to install
        fp.write('  config.vm.box = "%s"\n' % box)
        # Run the setup script on the virtual machine
        fp.write('  config.vm.provision "shell", path: "setup.sh"\n')

        fp.write('end\n')

    # Meta-data for reprounzip
    write_dict(target / '.reprounzip', {'use_chroot': use_chroot})

    signals.post_setup(target=target)
Example #28
0
 def dot_endpoint(self, f, level_pkgs):
     if level_pkgs == LVL_PKG_PACKAGE:
         return '"pkg %s"' % escape(self.name)
     else:
         return '"%s"' % escape(unicode_(f))
Example #29
0
def graph_dot(target, runs, packages, other_files, package_map, edges,
              inputs_outputs, level_pkgs, level_processes, level_other_files):
    """Writes a GraphViz DOT file from the collected information.
    """
    with target.open('w', encoding='utf-8', newline='\n') as fp:
        fp.write('digraph G {\n    /* programs */\n'
                 '    node [shape=box fontcolor=white '
                 'fillcolor=black style=filled];\n')

        # Programs
        logging.info("Writing programs...")
        for run in runs:
            run.dot(fp, level_processes)

        fp.write('\n'
                 '    node [shape=ellipse fontcolor="#131C39" '
                 'fillcolor="#C9D2ED"];\n')

        # Packages
        if level_pkgs not in (LVL_PKG_IGNORE, LVL_PKG_DROP):
            logging.info("Writing packages...")
            fp.write('\n    /* system packages */\n')
            for package in sorted(packages, key=lambda pkg: pkg.name):
                package.dot(fp, level_pkgs)

        fp.write('\n    /* other files */\n')

        # Other files
        logging.info("Writing other files...")
        for fi in sorted(other_files):
            if fi in inputs_outputs:
                fp.write('    "%(path)s" [fillcolor="#A3B4E0", '
                         'label="%(name)s\\n%(path)s"];\n' %
                         {'path': escape(unicode_(fi)),
                          'name': inputs_outputs[fi]})
            else:
                fp.write('    "%s";\n' % escape(unicode_(fi)))

        fp.write('\n')

        # Edges
        logging.info("Connecting edges...")
        done_edges = set()
        for prog, fi, mode, argv in edges:
            endp_prog = prog.dot_endpoint(level_processes)
            if fi in package_map:
                if level_pkgs == LVL_PKG_DROP:
                    continue
                endp_file = package_map[fi].dot_endpoint(fi, level_pkgs)
                e = endp_prog, endp_file, mode
                if e in done_edges:
                    continue
                else:
                    done_edges.add(e)
            else:
                endp_file = '"%s"' % escape(unicode_(fi))

            if mode is None:
                fp.write('    %s -> %s [style=bold, label="%s"];\n' % (
                         endp_file,
                         endp_prog,
                         escape(format_argv(argv))))
            elif mode & FILE_WRITE:
                fp.write('    %s -> %s [color="#000088"];\n' % (
                         endp_prog, endp_file))
            elif mode & FILE_READ:
                fp.write('    %s -> %s [color="#8888CC"];\n' % (
                         endp_file, endp_prog))

        fp.write('}\n')
Example #30
0
def graph_dot(target, runs, packages, other_files, package_map, edges,
              inputs_outputs, level_pkgs, level_processes, level_other_files):
    """Writes a GraphViz DOT file from the collected information.
    """
    with target.open('w', encoding='utf-8', newline='\n') as fp:
        fp.write('digraph G {\n    /* programs */\n'
                 '    node [shape=box fontcolor=white '
                 'fillcolor=black style=filled];\n')

        # Programs
        logging.info("Writing programs...")
        for run in runs:
            run.dot(fp, level_processes)

        fp.write('\n'
                 '    node [shape=ellipse fontcolor="#131C39" '
                 'fillcolor="#C9D2ED"];\n')

        # Packages
        if level_pkgs not in (LVL_PKG_IGNORE, LVL_PKG_DROP):
            logging.info("Writing packages...")
            fp.write('\n    /* system packages */\n')
            for package in sorted(packages, key=lambda pkg: pkg.name):
                package.dot(fp, level_pkgs)

        fp.write('\n    /* other files */\n')

        # Other files
        logging.info("Writing other files...")
        for fi in sorted(other_files):
            if fi in inputs_outputs:
                fp.write('    "%(path)s" [fillcolor="#A3B4E0", '
                         'label="%(name)s\\n%(path)s"];\n' % {
                             'path': escape(unicode_(fi)),
                             'name': inputs_outputs[fi]
                         })
            else:
                fp.write('    "%s";\n' % escape(unicode_(fi)))

        fp.write('\n')

        # Edges
        logging.info("Connecting edges...")
        done_edges = set()
        for prog, fi, mode, argv in edges:
            endp_prog = prog.dot_endpoint(level_processes)
            if fi in package_map:
                if level_pkgs == LVL_PKG_DROP:
                    continue
                endp_file = package_map[fi].dot_endpoint(fi, level_pkgs)
                e = endp_prog, endp_file, mode
                if e in done_edges:
                    continue
                else:
                    done_edges.add(e)
            else:
                endp_file = '"%s"' % escape(unicode_(fi))

            if mode is None:
                fp.write('    %s -> %s [style=bold, label="%s"];\n' %
                         (endp_file, endp_prog, escape(format_argv(argv))))
            elif mode & FILE_WRITE:
                fp.write('    %s -> %s [color="#000088"];\n' %
                         (endp_prog, endp_file))
            elif mode & FILE_READ:
                fp.write('    %s -> %s [color="#8888CC"];\n' %
                         (endp_file, endp_prog))

        fp.write('}\n')
Example #31
0
def vagrant_setup_create(args):
    """Sets up the experiment to be run in a Vagrant-built virtual machine.

    This can either build a chroot or not.

    If building a chroot, we do just like without Vagrant: we copy all the
    files and only get what's missing from the host. But we do install
    automatically the packages whose files are required.

    If not building a chroot, we install all the packages, and only unpack
    files that don't come from packages.

    In short: files from packages with packfiles=True will only be used if
    building a chroot.
    """
    if not args.pack:
        logging.critical("setup/create needs the pack filename")
        sys.exit(1)

    pack = Path(args.pack[0])
    target = Path(args.target[0])
    if target.exists():
        logging.critical("Target directory exists")
        sys.exit(1)
    use_chroot = args.use_chroot
    mount_bind = args.bind_magic_dirs
    record_usage(use_chroot=use_chroot,
                 mount_bind=mount_bind)

    signals.pre_setup(target=target, pack=pack)

    # Unpacks configuration file
    rpz_pack = RPZPack(pack)
    rpz_pack.extract_config(target / 'config.yml')

    # Loads config
    runs, packages, other_files = config = load_config(target / 'config.yml',
                                                       True)

    if not args.memory:
        memory = None
    else:
        try:
            memory = int(args.memory[-1])
        except ValueError:
            logging.critical("Invalid value for memory size: %r", args.memory)
            sys.exit(1)

    if args.base_image and args.base_image[0]:
        record_usage(vagrant_explicit_image=True)
        box = args.base_image[0]
        if args.distribution:
            target_distribution = args.distribution[0]
        else:
            target_distribution = None
    else:
        target_distribution, box = select_box(runs, gui=args.gui)
    logging.info("Using box %s", box)
    logging.debug("Distribution: %s", target_distribution or "unknown")

    # If using chroot, we might still need to install packages to get missing
    # (not packed) files
    if use_chroot:
        packages = [pkg for pkg in packages if not pkg.packfiles]
        if packages:
            record_usage(vagrant_install_pkgs=True)
            logging.info("Some packages were not packed, so we'll install and "
                         "copy their files\n"
                         "Packages that are missing:\n%s",
                         ' '.join(pkg.name for pkg in packages))

    if packages:
        try:
            installer = select_installer(pack, runs, target_distribution)
        except CantFindInstaller as e:
            logging.error("Need to install %d packages but couldn't select a "
                          "package installer: %s",
                          len(packages), e)

    target.mkdir(parents=True)

    try:
        # Writes setup script
        logging.info("Writing setup script %s...", target / 'setup.sh')
        with (target / 'setup.sh').open('w', encoding='utf-8',
                                        newline='\n') as fp:
            fp.write('#!/bin/sh\n\nset -e\n\n')
            if packages:
                # Updates package sources
                update_script = installer.update_script()
                if update_script:
                    fp.write(update_script)
                fp.write('\n')
                # Installs necessary packages
                fp.write(installer.install_script(packages))
                fp.write('\n')
                # TODO : Compare package versions (painful because of sh)

            # Untar
            if use_chroot:
                fp.write('\n'
                         'mkdir /experimentroot; cd /experimentroot\n')
                fp.write('tar zpxf /vagrant/data.tgz --numeric-owner '
                         '--strip=1 %s\n' % rpz_pack.data_prefix)
                if mount_bind:
                    fp.write('\n'
                             'mkdir -p /experimentroot/dev\n'
                             'mkdir -p /experimentroot/proc\n')

                for pkg in packages:
                    fp.write('\n# Copies files from package %s\n' % pkg.name)
                    for f in pkg.files:
                        f = f.path
                        dest = join_root(PosixPath('/experimentroot'), f)
                        fp.write('mkdir -p %s\n' %
                                 shell_escape(unicode_(f.parent)))
                        fp.write('cp -L %s %s\n' % (
                                 shell_escape(unicode_(f)),
                                 shell_escape(unicode_(dest))))
                fp.write(
                    '\n'
                    'cp /etc/resolv.conf /experimentroot/etc/resolv.conf\n')
            else:
                fp.write('\ncd /\n')
                paths = set()
                pathlist = []
                # Adds intermediate directories, and checks for existence in
                # the tar
                logging.info("Generating file list...")
                data_files = rpz_pack.data_filenames()
                for f in other_files:
                    if f.path.name == 'resolv.conf' and (
                            f.path.lies_under('/etc') or
                            f.path.lies_under('/run') or
                            f.path.lies_under('/var')):
                        continue
                    path = PosixPath('/')
                    for c in rpz_pack.remove_data_prefix(f.path).components:
                        path = path / c
                        if path in paths:
                            continue
                        paths.add(path)
                        if path in data_files:
                            pathlist.append(path)
                        else:
                            logging.info("Missing file %s", path)
                # FIXME : for some reason we need reversed() here, I'm not sure
                # why. Need to read more of tar's docs.
                # TAR bug: --no-overwrite-dir removes --keep-old-files
                # TAR bug: there is no way to make --keep-old-files not report
                # an error if an existing file is encountered. --skip-old-files
                # was introduced too recently. Instead, we just ignore the exit
                # status
                with (target / 'rpz-files.list').open('wb') as lfp:
                    for p in reversed(pathlist):
                        lfp.write(join_root(rpz_pack.data_prefix, p).path)
                        lfp.write(b'\0')
                fp.write('tar zpxf /vagrant/data.tgz --keep-old-files '
                         '--numeric-owner --strip=1 '
                         '--null -T /vagrant/rpz-files.list || /bin/true\n')

            # Copies busybox
            if use_chroot:
                arch = runs[0]['architecture']
                download_file(busybox_url(arch),
                              target / 'busybox',
                              'busybox-%s' % arch)
                fp.write(r'''
cp /vagrant/busybox /experimentroot/busybox
chmod +x /experimentroot/busybox
mkdir -p /experimentroot/bin
[ -e /experimentroot/bin/sh ] || \
    ln -s /busybox /experimentroot/bin/sh
''')

        # Copies pack
        logging.info("Copying pack file...")
        rpz_pack.copy_data_tar(target / 'data.tgz')

        rpz_pack.close()

        # Writes Vagrant file
        logging.info("Writing %s...", target / 'Vagrantfile')
        with (target / 'Vagrantfile').open('w', encoding='utf-8',
                                           newline='\n') as fp:
            # Vagrant header and version
            fp.write(
                '# -*- mode: ruby -*-\n'
                '# vi: set ft=ruby\n\n'
                'VAGRANTFILE_API_VERSION = "2"\n\n'
                'Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|\n')
            # Selects which box to install
            fp.write('  config.vm.box = "%s"\n' % box)
            # Run the setup script on the virtual machine
            fp.write('  config.vm.provision "shell", path: "setup.sh"\n')

            # Memory size
            if memory is not None or args.gui:
                fp.write('  config.vm.provider "virtualbox" do |v|\n')
                if memory is not None:
                    fp.write('    v.memory = %d\n' % memory)
                if args.gui:
                    fp.write('    v.gui = true\n')
                fp.write('  end\n')

            fp.write('end\n')

        # Meta-data for reprounzip
        write_dict(target,
                   metadata_initial_iofiles(config,
                                            {'use_chroot': use_chroot,
                                             'gui': args.gui}))

        signals.post_setup(target=target, pack=pack)
    except Exception:
        target.rmtree(ignore_errors=True)
        raise
Example #32
0
def generate(target, configfile, database, all_forks=False):
    """Main function for the graph subcommand.
    """
    # In here, a file is any file on the filesystem. A binary is a file, that
    # gets executed. A process is a system-level task, identified by its pid
    # (pids don't get reused in the database).
    # What I call program is the couple (process, binary), so forking creates a
    # new program (with the same binary) and exec'ing creates a new program as
    # well (with the same process)
    # Because of this, fork+exec will create an intermediate program that
    # doesn't do anything (new process but still old binary). If that program
    # doesn't do anything worth showing on the graph, it will be erased, unless
    # all_forks is True (--all-forks).

    # Reads package ownership from the configuration
    if not configfile.is_file():
        logging.critical("Configuration file does not exist!\n"
                         "Did you forget to run 'reprozip trace'?\n"
                         "If not, you might want to use --dir to specify an "
                         "alternate location.")
        sys.exit(1)
    runs, packages, other_files = load_config(configfile, canonical=False)
    packages = dict((f.path, pkg) for pkg in packages for f in pkg.files)

    if PY3:
        # On PY3, connect() only accepts unicode
        conn = sqlite3.connect(str(database))
    else:
        conn = sqlite3.connect(database.path)
    conn.row_factory = sqlite3.Row

    # This is a bit weird. We need to iterate on all types of events at the
    # same time, ordering by timestamp, so we decorate-sort-undecorate
    # Decoration adds timestamp (for sorting) and tags by event type, one of
    # 'process', 'open' or 'exec'

    # Reads processes from the database
    process_cursor = conn.cursor()
    process_rows = process_cursor.execute(
        '''
        SELECT id, parent, timestamp
        FROM processes
        ORDER BY id
        ''')
    processes = {}
    all_programs = []

    # ... and opened files...
    file_cursor = conn.cursor()
    file_rows = file_cursor.execute(
        '''
        SELECT name, timestamp, mode, process
        FROM opened_files
        ORDER BY id
        ''')
    binaries = set()
    files = OrderedSet()
    edges = OrderedSet()

    # ... as well as executed files.
    exec_cursor = conn.cursor()
    exec_rows = exec_cursor.execute(
        '''
        SELECT name, timestamp, process, argv
        FROM executed_files
        ORDER BY id
        ''')

    # Loop on all event lists
    logging.info("Getting all events from database...")
    rows = heapq.merge(((r[2], 'process', r) for r in process_rows),
                       ((r[1], 'open', r) for r in file_rows),
                       ((r[1], 'exec', r) for r in exec_rows))
    for ts, event_type, data in rows:
        if event_type == 'process':
            r_id, r_parent, r_timestamp = data
            if r_parent is not None:
                parent = processes[r_parent]
                binary = parent.binary
            else:
                parent = None
                binary = None
            p = Process(r_id,
                        parent,
                        r_timestamp,
                        False,
                        binary,
                        C_INITIAL if r_parent is None else C_FORK)
            processes[r_id] = p
            all_programs.append(p)

        elif event_type == 'open':
            r_name, r_timestamp, r_mode, r_process = data
            r_name = PosixPath(r_name)
            if r_mode != FILE_WDIR:
                process = processes[r_process]
                files.add(r_name)
                edges.add((process, r_name, r_mode, None))

        elif event_type == 'exec':
            r_name, r_timestamp, r_process, r_argv = data
            r_name = PosixPath(r_name)
            process = processes[r_process]
            binaries.add(r_name)
            # Here we split this process in two "programs", unless the previous
            # one hasn't done anything since it was created via fork()
            if not all_forks and not process.acted:
                process.binary = r_name
                process.created = C_FORKEXEC
                process.acted = True
            else:
                process = Process(process.pid,
                                  process,
                                  r_timestamp,
                                  True,         # Hides exec only once
                                  r_name,
                                  C_EXEC)
                all_programs.append(process)
                processes[r_process] = process
            argv = tuple(r_argv.split('\0'))
            if not argv[-1]:
                argv = argv[:-1]
            edges.add((process, r_name, None, argv))

    process_cursor.close()
    file_cursor.close()
    conn.close()

    # Puts files in packages
    logging.info("Organizes packages...")
    package_files = {}
    other_files = []
    for f in files:
        pkg = packages.get(f)
        if pkg is not None:
            package_files.setdefault((pkg.name, pkg.version), []).append(f)
        else:
            other_files.append(f)

    # Writes DOT file
    with target.open('w', encoding='utf-8', newline='\n') as fp:
        fp.write('digraph G {\n    /* programs */\n    node [shape=box];\n')
        # Programs
        logging.info("Writing programs...")
        for program in all_programs:
            fp.write('    prog%d [label="%s (%d)"];\n' % (
                     id(program), program.binary or "-", program.pid))
            if program.parent is not None:
                reason = ''
                if program.created == C_FORK:
                    reason = "fork"
                elif program.created == C_EXEC:
                    reason = "exec"
                elif program.created == C_FORKEXEC:
                    reason = "fork+exec"
                fp.write('    prog%d -> prog%d [label="%s"];\n' % (
                         id(program.parent), id(program), reason))

        fp.write('\n    node [shape=ellipse];\n\n    /* system packages */\n')

        # Files from packages
        logging.info("Writing packages...")
        for i, ((name, version), files) in enumerate(iteritems(package_files)):
            fp.write('    subgraph cluster%d {\n        label=' % i)
            if version:
                fp.write('"%s %s";\n' % (escape(name), escape(version)))
            else:
                fp.write('"%s";\n' % escape(name))
            for f in files:
                fp.write('        "%s";\n' % escape(unicode_(f)))
            fp.write('    }\n')

        fp.write('\n    /* other files */\n')

        # Other files
        logging.info("Writing other files...")
        for f in other_files:
            fp.write('    "%s"\n' % escape(unicode_(f)))

        fp.write('\n')

        # Edges
        logging.info("Connecting edges...")
        for prog, f, mode, argv in edges:
            if mode is None:
                fp.write('    "%s" -> prog%d [color=blue, label="%s"];\n' % (
                         escape(unicode_(f)),
                         id(prog),
                         escape(' '.join(argv))))
            elif mode & FILE_WRITE:
                fp.write('    prog%d -> "%s" [color=red];\n' % (
                         id(prog), escape(unicode_(f))))
            elif mode & FILE_READ:
                fp.write('    "%s" -> prog%d [color=green];\n' % (
                         escape(unicode_(f)), id(prog)))

        fp.write('}\n')
Example #33
0
 def json_endpoint(self, f, level_pkgs):
     if level_pkgs == LVL_PKG_PACKAGE:
         return self.name
     else:
         return unicode_(f)