Example #1
0
def compile_inputs_outputs(runs, inputs, outputs):
    """Gives names to input/output files and creates InputOutputFile objects.
    """
    # {path: (run_nb, arg_nb) or None}
    runs_with_file = {}
    # run_nb: number_of_file_arguments
    nb_file_args = []
    # {path: [runs]}
    readers = {}
    writers = {}

    for run_nb, run, in_files, out_files in izip(count(), runs,
                                                 inputs, outputs):
        # List which runs read or write each file
        for p in in_files:
            readers.setdefault(p, []).append(run_nb)
        for p in out_files:
            writers.setdefault(p, []).append(run_nb)

        # Locate files that appear on a run's command line
        files_set = set(in_files) | set(out_files)
        nb_files = 0
        for arg_nb, arg in enumerate(run['argv']):
            p = Path(run['workingdir'], arg).resolve()
            if p in files_set:
                nb_files += 1
                if p not in runs_with_file:
                    runs_with_file[p] = run_nb, arg_nb
                elif runs_with_file[p] is not None:
                    runs_with_file[p] = None
        nb_file_args.append(nb_files)

    file_names = {}
    make_unique = UniqueNames()

    for fi in flatten(2, (inputs, outputs)):
        if fi in file_names:
            continue

        # If it appears in at least one of the command-lines
        if fi in runs_with_file:
            # If it only appears once in the command-lines
            if runs_with_file[fi] is not None:
                run_nb, arg_nb = runs_with_file[fi]
                parts = []
                # Run number, if there are more than one runs
                if len(runs) > 1:
                    parts.append(run_nb)
                # Argument number, if there are more than one file arguments
                if nb_file_args[run_nb] > 1:
                    parts.append(arg_nb)
                file_names[fi] = make_unique(
                    'arg%s' % '_'.join('%s' % s for s in parts))
            else:
                file_names[fi] = make_unique('arg_%s' % fi.unicodename)
        else:
            file_names[fi] = make_unique(fi.unicodename)

    return dict((n, InputOutputFile(p, readers.get(p, []), writers.get(p, [])))
                for p, n in iteritems(file_names))
Example #2
0
def compile_inputs_outputs(runs, inputs, outputs):
    """Gives names to input/output files and creates InputOutputFile objects.
    """
    # {path: (run_nb, arg_nb) or None}
    runs_with_file = {}
    # run_nb: number_of_file_arguments
    nb_file_args = []
    # {path: [runs]}
    readers = {}
    writers = {}

    for run_nb, run, in_files, out_files in izip(count(), runs,
                                                 inputs, outputs):
        # List which runs read or write each file
        for p in in_files:
            readers.setdefault(p, []).append(run_nb)
        for p in out_files:
            writers.setdefault(p, []).append(run_nb)

        # Locate files that appear on a run's command line
        files_set = set(in_files) | set(out_files)
        nb_files = 0
        for arg_nb, arg in enumerate(run['argv']):
            p = Path(run['workingdir'], arg).resolve()
            if p in files_set:
                nb_files += 1
                if p not in runs_with_file:
                    runs_with_file[p] = run_nb, arg_nb
                elif runs_with_file[p] is not None:
                    runs_with_file[p] = None
        nb_file_args.append(nb_files)

    file_names = {}
    make_unique = UniqueNames()

    for fi in flatten(2, (inputs, outputs)):
        if fi in file_names:
            continue

        # If it appears in at least one of the command-lines
        if fi in runs_with_file:
            # If it only appears once in the command-lines
            if runs_with_file[fi] is not None:
                run_nb, arg_nb = runs_with_file[fi]
                parts = []
                # Run number, if there are more than one runs
                if len(runs) > 1:
                    parts.append(run_nb)
                # Argument number, if there are more than one file arguments
                if nb_file_args[run_nb] > 1:
                    parts.append(arg_nb)
                file_names[fi] = make_unique(
                    'arg%s' % '_'.join('%s' % s for s in parts))
            else:
                file_names[fi] = make_unique('arg_%s' % fi.unicodename)
        else:
            file_names[fi] = make_unique(fi.unicodename)

    return dict((n, InputOutputFile(p, readers.get(p, []), writers.get(p, [])))
                for p, n in iteritems(file_names))
Example #3
0
def write_configuration(directory, sort_packages, overwrite=False):
    """Writes the canonical YAML configuration file.
    """
    database = directory / 'trace.sqlite3'

    if PY3:
        # On PY3, connect() only accepts unicode
        conn = sqlite3.connect(str(database))
    else:
        conn = sqlite3.connect(database.path)
    conn.row_factory = sqlite3.Row

    # Reads info from database
    files, inputs, outputs = get_files(conn)

    # Identifies which file comes from which package
    if sort_packages:
        files, packages = identify_packages(files)
    else:
        packages = []

    # Makes sure all the directories used as working directories are packed
    # (they already do if files from them are used, but empty directories do
    # not get packed inside a tar archive)
    files.update(d for d in list_directories(conn) if d.path.is_dir())

    # Writes configuration file
    config = directory / 'config.yml'
    distribution = platform.linux_distribution()[0:2]
    oldconfig = not overwrite and config.exists()
    cur = conn.cursor()
    if oldconfig:
        # Loads in previous config
        runs, oldpkgs, oldfiles, patterns = load_config(config,
                                                        canonical=False,
                                                        File=TracedFile)
        # Here, additional patterns are discarded

        executions = cur.execute(
                '''
                SELECT e.name, e.argv, e.envp, e.workingdir, p.exitcode
                FROM executed_files e
                INNER JOIN processes p on p.id=e.id
                WHERE p.parent ISNULL
                ORDER BY p.id DESC
                LIMIT 1;
                ''')
        inputs = inputs[-1:]

        files, packages = merge_files(files, packages,
                                      oldfiles,
                                      oldpkgs)
    else:
        runs = []
        executions = cur.execute(
                '''
                SELECT e.name, e.argv, e.envp, e.workingdir, p.exitcode
                FROM executed_files e
                INNER JOIN processes p on p.id=e.id
                WHERE p.parent ISNULL
                ORDER BY p.id;
                ''')
    for ((r_name, r_argv, r_envp, r_workingdir, r_exitcode),
            input_files, output_files) in izip(executions, inputs, outputs):
        # Decodes command-line
        argv = r_argv.split('\0')
        if not argv[-1]:
            argv = argv[:-1]

        # Decodes environment
        envp = r_envp.split('\0')
        if not envp[-1]:
            envp = envp[:-1]
        environ = dict(v.split('=', 1) for v in envp)

        # Gets files from command-line
        command_line_files = {}
        for i, arg in enumerate(argv):
            p = Path(r_workingdir, arg).resolve()
            if p.is_file():
                command_line_files[p] = i
        input_files_on_cmdline = sum(1
                                     for in_file in input_files
                                     if in_file in command_line_files)
        output_files_on_cmdline = sum(1
                                      for out_file in input_files
                                      if out_file in command_line_files)

        # Labels input files
        input_files_dict = {}
        for in_file in input_files:
            # If file is on the command-line
            if in_file in command_line_files:
                if input_files_on_cmdline > 1:
                    label = "arg_%d" % command_line_files[in_file]
                else:
                    label = "arg"
            # Else, use file's name
            else:
                label = in_file.unicodename
            # Make labels unique
            uniquelabel = label
            i = 1
            while uniquelabel in input_files_dict:
                i += 1
                uniquelabel = '%s_%d' % (label, i)
            input_files_dict[uniquelabel] = str(in_file)
        # TODO : Note that right now, we keep as input files the ones that
        # don't appear on the command-line

        # Labels output files
        output_files_dict = {}
        for out_file in output_files:
            # If file is on the command-line
            if out_file in command_line_files:
                if output_files_on_cmdline > 1:
                    label = "arg_%d" % command_line_files[out_file]
                else:
                    label = "arg"
            # Else, use file's name
            else:
                label = out_file.unicodename
            # Make labels unique
            uniquelabel = label
            i = 1
            while uniquelabel in output_files_dict:
                i += 1
                uniquelabel = '%s_%d' % (label, i)
            output_files_dict[uniquelabel] = str(out_file)
        # TODO : Note that right now, we keep as output files the ones that
        # don't appear on the command-line

        runs.append({'binary': r_name, 'argv': argv,
                     'workingdir': Path(r_workingdir).path,
                     'architecture': platform.machine().lower(),
                     'distribution': distribution,
                     'hostname': platform.node(),
                     'system': [platform.system(), platform.release()],
                     'environ': environ,
                     'uid': os.getuid(),
                     'gid': os.getgid(),
                     'signal' if r_exitcode & 0x0100 else 'exitcode':
                         r_exitcode & 0xFF,
                     'input_files': input_files_dict,
                     'output_files': output_files_dict})
    cur.close()

    conn.close()

    save_config(config, runs, packages, files, reprozip_version)

    print("Configuration file written in {0!s}".format(config))
    print("Edit that file then run the packer -- "
          "use 'reprozip pack -h' for help")
Example #4
0
def write_configuration(directory, sort_packages, overwrite=False):
    """Writes the canonical YAML configuration file.
    """
    database = directory / 'trace.sqlite3'

    if PY3:
        # On PY3, connect() only accepts unicode
        conn = sqlite3.connect(str(database))
    else:
        conn = sqlite3.connect(database.path)
    conn.row_factory = sqlite3.Row

    # Reads info from database
    files, inputs, outputs = get_files(conn)

    # Identifies which file comes from which package
    if sort_packages:
        files, packages = identify_packages(files)
    else:
        packages = []

    # Makes sure all the directories used as working directories are packed
    # (they already do if files from them are used, but empty directories do
    # not get packed inside a tar archive)
    files.update(d for d in list_directories(conn) if d.path.is_dir())

    # Writes configuration file
    config = directory / 'config.yml'
    distribution = platform.linux_distribution()[0:2]
    oldconfig = not overwrite and config.exists()
    cur = conn.cursor()
    if not oldconfig:
        runs = []
        # This gets all the top-level processes (p.parent ISNULL) and the first
        # executed file for that process (sorting by ids, which are
        # chronological)
        executions = cur.execute(
                '''
                SELECT e.name, e.argv, e.envp, e.workingdir, p.exitcode
                FROM processes p
                JOIN executed_files e ON e.id=(
                    SELECT id FROM executed_files e2
                    WHERE e2.process=p.id
                    ORDER BY e2.id
                    LIMIT 1
                )
                WHERE p.parent ISNULL;
                ''')
    else:
        # Loads in previous config
        runs, oldpkgs, oldfiles, patterns = load_config(config,
                                                        canonical=False,
                                                        File=TracedFile)
        # Here, additional patterns are discarded

        # Same query as previous block but only gets last process
        executions = cur.execute(
                '''
                SELECT e.name, e.argv, e.envp, e.workingdir, p.exitcode
                FROM processes p
                JOIN executed_files e ON e.id=(
                    SELECT id FROM executed_files e2
                    WHERE e2.process=p.id
                    ORDER BY e2.id
                    LIMIT 1
                )
                WHERE p.parent ISNULL
                ORDER BY p.id DESC
                LIMIT 1;
                ''')
        inputs = inputs[-1:]
        outputs = outputs[-1:]

        files, packages = merge_files(files, packages,
                                      oldfiles,
                                      oldpkgs)
    for ((r_name, r_argv, r_envp, r_workingdir, r_exitcode),
            input_files, output_files) in izip(executions, inputs, outputs):
        # Decodes command-line
        argv = r_argv.split('\0')
        if not argv[-1]:
            argv = argv[:-1]

        # Decodes environment
        envp = r_envp.split('\0')
        if not envp[-1]:
            envp = envp[:-1]
        environ = dict(v.split('=', 1) for v in envp)

        # Gets files from command-line
        command_line_files = {}
        for i, arg in enumerate(argv):
            p = Path(r_workingdir, arg).resolve()
            if p.is_file():
                command_line_files[p] = i
        input_files_on_cmdline = sum(1
                                     for in_file in input_files
                                     if in_file in command_line_files)
        output_files_on_cmdline = sum(1
                                      for out_file in output_files
                                      if out_file in command_line_files)

        # Labels input files
        input_files_dict = {}
        for in_file in input_files:
            # If file is on the command-line
            if in_file in command_line_files:
                if input_files_on_cmdline > 1:
                    label = "arg_%d" % command_line_files[in_file]
                else:
                    label = "arg"
            # Else, use file's name
            else:
                label = in_file.unicodename
            # Make labels unique
            uniquelabel = label
            i = 1
            while uniquelabel in input_files_dict:
                i += 1
                uniquelabel = '%s_%d' % (label, i)
            input_files_dict[uniquelabel] = str(in_file)
        # TODO : Note that right now, we keep as input files the ones that
        # don't appear on the command-line

        # Labels output files
        output_files_dict = {}
        for out_file in output_files:
            # If file is on the command-line
            if out_file in command_line_files:
                if output_files_on_cmdline > 1:
                    label = "arg_%d" % command_line_files[out_file]
                else:
                    label = "arg"
            # Else, use file's name
            else:
                label = out_file.unicodename
            # Make labels unique
            uniquelabel = label
            i = 1
            while uniquelabel in output_files_dict:
                i += 1
                uniquelabel = '%s_%d' % (label, i)
            output_files_dict[uniquelabel] = str(out_file)
        # TODO : Note that right now, we keep as output files the ones that
        # don't appear on the command-line

        runs.append({'binary': r_name, 'argv': argv,
                     'workingdir': Path(r_workingdir).path,
                     'architecture': platform.machine().lower(),
                     'distribution': distribution,
                     'hostname': platform.node(),
                     'system': [platform.system(), platform.release()],
                     'environ': environ,
                     'uid': os.getuid(),
                     'gid': os.getgid(),
                     'signal' if r_exitcode & 0x0100 else 'exitcode':
                         r_exitcode & 0xFF,
                     'input_files': input_files_dict,
                     'output_files': output_files_dict})
    cur.close()

    conn.close()

    save_config(config, runs, packages, files, reprozip_version)

    print("Configuration file written in {0!s}".format(config))
    print("Edit that file then run the packer -- "
          "use 'reprozip pack -h' for help")