예제 #1
0
파일: pack.py 프로젝트: Aloma/reprozip
def canonicalize_config(runs, packages, other_files, additional_patterns,
                        sort_packages):
    """Expands ``additional_patterns`` from the configuration file.
    """
    add_files = expand_patterns(additional_patterns)
    if sort_packages:
        add_files, add_packages = identify_packages(add_files)
    else:
        add_packages = []
    other_files, packages = merge_files(add_files, add_packages, other_files,
                                        packages)
    return runs, packages, other_files
예제 #2
0
파일: pack.py 프로젝트: koconne8/reprozip
def canonicalize_config(packages, other_files, additional_patterns,
                        sort_packages):
    """Expands ``additional_patterns`` from the configuration file.
    """
    add_files = expand_patterns(additional_patterns)
    if sort_packages:
        add_files, add_packages = identify_packages(add_files)
    else:
        add_packages = []
    other_files, packages = merge_files(add_files, add_packages,
                                        other_files, packages)
    return packages, other_files
예제 #3
0
파일: pack.py 프로젝트: ViDA-NYU/reprozip
def canonicalize_config(packages, other_files, additional_patterns,
                        sort_packages):
    """Expands ``additional_patterns`` from the configuration file.
    """
    if additional_patterns:
        add_files = expand_patterns(additional_patterns)
        logger.info("Found %d files from expanding additional_patterns...",
                    len(add_files))
        if add_files:
            if sort_packages:
                add_files, add_packages = identify_packages(add_files)
            else:
                add_packages = []
            other_files, packages = combine_files(add_files, add_packages,
                                                  other_files, packages)
    return packages, other_files
예제 #4
0
파일: pack.py 프로젝트: lcw/reprozip
def canonicalize_config(packages, other_files, additional_patterns,
                        sort_packages):
    """Expands ``additional_patterns`` from the configuration file.
    """
    if additional_patterns:
        add_files = expand_patterns(additional_patterns)
        logger.info("Found %d files from expanding additional_patterns...",
                    len(add_files))
        if add_files:
            if sort_packages:
                add_files, add_packages = identify_packages(add_files)
            else:
                add_packages = []
            other_files, packages = combine_files(add_files, add_packages,
                                                  other_files, packages)
    return packages, other_files
예제 #5
0
def write_configuration(directory,
                        sort_packages,
                        find_inputs_outputs,
                        overwrite=False):
    """Writes the canonical YAML configuration file.
    """
    database = directory / 'trace.sqlite3'

    assert database.is_file()
    conn = sqlite3.connect(str(database))  # connect() only accepts str
    conn.row_factory = sqlite3.Row

    # Reads info from database
    files, inputs, outputs = get_files(conn)

    # Identifies which file comes from which package
    if sort_packages:
        files, packages = identify_packages(files)
    else:
        packages = []

    # Writes configuration file
    config = directory / 'config.yml'
    distribution = [distro.id(), distro.version()]
    cur = conn.cursor()
    if overwrite or not config.exists():
        runs = []
        # This gets all the top-level processes (p.parent ISNULL) and the first
        # executed file for that process (sorting by ids, which are
        # chronological)
        executions = cur.execute('''
            SELECT e.name, e.argv, e.envp, e.workingdir,
                   p.timestamp, p.exit_timestamp, p.exitcode
            FROM processes p
            JOIN executed_files e ON e.id=(
                SELECT id FROM executed_files e2
                WHERE e2.process=p.id
                ORDER BY e2.id
                LIMIT 1
            )
            WHERE p.parent ISNULL;
            ''')
    else:
        # Loads in previous config
        runs, oldpkgs, oldfiles = load_config(config,
                                              canonical=False,
                                              File=TracedFile)

        # Same query as previous block but only gets last process
        executions = cur.execute(
            '''
            SELECT e.name, e.argv, e.envp, e.workingdir,
                   p.timestamp, p.exit_timestamp, p.exitcode
            FROM processes p
            JOIN executed_files e ON e.id=(
                SELECT id FROM executed_files e2
                WHERE e2.process=p.id
                ORDER BY e2.id
                LIMIT 1
            )
            WHERE p.parent ISNULL
            ORDER BY p.id
            LIMIT 2147483647 OFFSET ?;
            ''', (len(runs), ))
    for (r_name, r_argv, r_envp, r_workingdir, r_start, r_end,
         r_exitcode) in executions:
        # Decodes command-line
        argv = r_argv.split('\0')
        if not argv[-1]:
            argv = argv[:-1]

        # Decodes environment
        envp = r_envp.split('\0')
        if not envp[-1]:
            envp = envp[:-1]
        environ = dict(v.split('=', 1) for v in envp)

        run = {
            'id': "run%d" % len(runs),
            'binary': r_name,
            'argv': argv,
            'workingdir': str(Path(r_workingdir)),
            'architecture': platform.machine().lower(),
            'distribution': distribution,
            'hostname': platform.node(),
            'system': [platform.system(),
                       platform.release()],
            'environ': environ,
            'uid': os.getuid(),
            'gid': os.getgid()
        }

        if r_exitcode & 0x0100:
            run['signal'] = r_exitcode & 0xFF
        else:
            run['exitcode'] = r_exitcode & 0xFF

        if r_end is not None:
            run['walltime'] = (r_end - r_start) / 1.0E9  # ns to s

        runs.append(run)

    cur.close()
    conn.close()

    if find_inputs_outputs:
        inputs_outputs = compile_inputs_outputs(runs, inputs, outputs)
    else:
        inputs_outputs = {}

    save_config(config, runs, packages, files, reprozip_version,
                inputs_outputs)

    print("Configuration file written in {0!s}".format(config))
    print("Edit that file then run the packer -- "
          "use 'reprozip pack -h' for help")
예제 #6
0
def write_configuration(directory, sort_packages, find_inputs_outputs,
                        overwrite=False):
    """Writes the canonical YAML configuration file.
    """
    database = directory / 'trace.sqlite3'

    if PY3:
        # On PY3, connect() only accepts unicode
        conn = sqlite3.connect(str(database))
    else:
        conn = sqlite3.connect(database.path)
    conn.row_factory = sqlite3.Row

    # Reads info from database
    files, inputs, outputs = get_files(conn)

    # Identifies which file comes from which package
    if sort_packages:
        files, packages = identify_packages(files)
    else:
        packages = []

    # Writes configuration file
    config = directory / 'config.yml'
    distribution = platform.linux_distribution()[0:2]
    cur = conn.cursor()
    if overwrite or not config.exists():
        runs = []
        # This gets all the top-level processes (p.parent ISNULL) and the first
        # executed file for that process (sorting by ids, which are
        # chronological)
        executions = cur.execute(
            '''
            SELECT e.name, e.argv, e.envp, e.workingdir, p.exitcode
            FROM processes p
            JOIN executed_files e ON e.id=(
                SELECT id FROM executed_files e2
                WHERE e2.process=p.id
                ORDER BY e2.id
                LIMIT 1
            )
            WHERE p.parent ISNULL;
            ''')
    else:
        # Loads in previous config
        runs, oldpkgs, oldfiles = load_config(config,
                                              canonical=False,
                                              File=TracedFile)

        # Same query as previous block but only gets last process
        executions = cur.execute(
            '''
            SELECT e.name, e.argv, e.envp, e.workingdir, p.exitcode
            FROM processes p
            JOIN executed_files e ON e.id=(
                SELECT id FROM executed_files e2
                WHERE e2.process=p.id
                ORDER BY e2.id
                LIMIT 1
            )
            WHERE p.parent ISNULL
            ORDER BY p.id DESC
            LIMIT 1;
            ''')
        inputs = inputs[-1:]
        outputs = outputs[-1:]

        files, packages = merge_files(files, packages,
                                      oldfiles,
                                      oldpkgs)
    for r_name, r_argv, r_envp, r_workingdir, r_exitcode in executions:
        # Decodes command-line
        argv = r_argv.split('\0')
        if not argv[-1]:
            argv = argv[:-1]

        # Decodes environment
        envp = r_envp.split('\0')
        if not envp[-1]:
            envp = envp[:-1]
        environ = dict(v.split('=', 1) for v in envp)

        runs.append({'binary': r_name, 'argv': argv,
                     'workingdir': unicode_(Path(r_workingdir)),
                     'architecture': platform.machine().lower(),
                     'distribution': distribution,
                     'hostname': platform.node(),
                     'system': [platform.system(), platform.release()],
                     'environ': environ,
                     'uid': os.getuid(),
                     'gid': os.getgid(),
                     'signal' if r_exitcode & 0x0100 else 'exitcode':
                         r_exitcode & 0xFF})

    cur.close()
    conn.close()

    if find_inputs_outputs:
        inputs_outputs = compile_inputs_outputs(runs, inputs, outputs)
    else:
        inputs_outputs = {}

    save_config(config, runs, packages, files, reprozip_version,
                inputs_outputs)

    print("Configuration file written in {0!s}".format(config))
    print("Edit that file then run the packer -- "
          "use 'reprozip pack -h' for help")
예제 #7
0
파일: trace.py 프로젝트: Aloma/reprozip
def write_configuration(directory, sort_packages, overwrite=False):
    """Writes the canonical YAML configuration file.
    """
    database = directory / 'trace.sqlite3'

    if PY3:
        # On PY3, connect() only accepts unicode
        conn = sqlite3.connect(str(database))
    else:
        conn = sqlite3.connect(database.path)
    conn.row_factory = sqlite3.Row

    # Reads info from database
    files, inputs, outputs = get_files(conn)

    # Identifies which file comes from which package
    if sort_packages:
        files, packages = identify_packages(files)
    else:
        packages = []

    # Makes sure all the directories used as working directories are packed
    # (they already do if files from them are used, but empty directories do
    # not get packed inside a tar archive)
    files.update(d for d in list_directories(conn) if d.path.is_dir())

    # Writes configuration file
    config = directory / 'config.yml'
    distribution = platform.linux_distribution()[0:2]
    oldconfig = not overwrite and config.exists()
    cur = conn.cursor()
    if oldconfig:
        # Loads in previous config
        runs, oldpkgs, oldfiles, patterns = load_config(config,
                                                        canonical=False,
                                                        File=TracedFile)
        # Here, additional patterns are discarded

        executions = cur.execute(
                '''
                SELECT e.name, e.argv, e.envp, e.workingdir, p.exitcode
                FROM executed_files e
                INNER JOIN processes p on p.id=e.id
                WHERE p.parent ISNULL
                ORDER BY p.id DESC
                LIMIT 1;
                ''')
        inputs = inputs[-1:]

        files, packages = merge_files(files, packages,
                                      oldfiles,
                                      oldpkgs)
    else:
        runs = []
        executions = cur.execute(
                '''
                SELECT e.name, e.argv, e.envp, e.workingdir, p.exitcode
                FROM executed_files e
                INNER JOIN processes p on p.id=e.id
                WHERE p.parent ISNULL
                ORDER BY p.id;
                ''')
    for ((r_name, r_argv, r_envp, r_workingdir, r_exitcode),
            input_files, output_files) in izip(executions, inputs, outputs):
        # Decodes command-line
        argv = r_argv.split('\0')
        if not argv[-1]:
            argv = argv[:-1]

        # Decodes environment
        envp = r_envp.split('\0')
        if not envp[-1]:
            envp = envp[:-1]
        environ = dict(v.split('=', 1) for v in envp)

        # Gets files from command-line
        command_line_files = {}
        for i, arg in enumerate(argv):
            p = Path(r_workingdir, arg).resolve()
            if p.is_file():
                command_line_files[p] = i
        input_files_on_cmdline = sum(1
                                     for in_file in input_files
                                     if in_file in command_line_files)
        output_files_on_cmdline = sum(1
                                      for out_file in input_files
                                      if out_file in command_line_files)

        # Labels input files
        input_files_dict = {}
        for in_file in input_files:
            # If file is on the command-line
            if in_file in command_line_files:
                if input_files_on_cmdline > 1:
                    label = "arg_%d" % command_line_files[in_file]
                else:
                    label = "arg"
            # Else, use file's name
            else:
                label = in_file.unicodename
            # Make labels unique
            uniquelabel = label
            i = 1
            while uniquelabel in input_files_dict:
                i += 1
                uniquelabel = '%s_%d' % (label, i)
            input_files_dict[uniquelabel] = str(in_file)
        # TODO : Note that right now, we keep as input files the ones that
        # don't appear on the command-line

        # Labels output files
        output_files_dict = {}
        for out_file in output_files:
            # If file is on the command-line
            if out_file in command_line_files:
                if output_files_on_cmdline > 1:
                    label = "arg_%d" % command_line_files[out_file]
                else:
                    label = "arg"
            # Else, use file's name
            else:
                label = out_file.unicodename
            # Make labels unique
            uniquelabel = label
            i = 1
            while uniquelabel in output_files_dict:
                i += 1
                uniquelabel = '%s_%d' % (label, i)
            output_files_dict[uniquelabel] = str(out_file)
        # TODO : Note that right now, we keep as output files the ones that
        # don't appear on the command-line

        runs.append({'binary': r_name, 'argv': argv,
                     'workingdir': Path(r_workingdir).path,
                     'architecture': platform.machine().lower(),
                     'distribution': distribution,
                     'hostname': platform.node(),
                     'system': [platform.system(), platform.release()],
                     'environ': environ,
                     'uid': os.getuid(),
                     'gid': os.getgid(),
                     'signal' if r_exitcode & 0x0100 else 'exitcode':
                         r_exitcode & 0xFF,
                     'input_files': input_files_dict,
                     'output_files': output_files_dict})
    cur.close()

    conn.close()

    save_config(config, runs, packages, files, reprozip_version)

    print("Configuration file written in {0!s}".format(config))
    print("Edit that file then run the packer -- "
          "use 'reprozip pack -h' for help")
예제 #8
0
def write_configuration(directory, sort_packages, overwrite=False):
    """Writes the canonical YAML configuration file.
    """
    database = directory / 'trace.sqlite3'

    if PY3:
        # On PY3, connect() only accepts unicode
        conn = sqlite3.connect(str(database))
    else:
        conn = sqlite3.connect(database.path)
    conn.row_factory = sqlite3.Row

    # Reads info from database
    files, inputs, outputs = get_files(conn)

    # Identifies which file comes from which package
    if sort_packages:
        files, packages = identify_packages(files)
    else:
        packages = []

    # Makes sure all the directories used as working directories are packed
    # (they already do if files from them are used, but empty directories do
    # not get packed inside a tar archive)
    files.update(d for d in list_directories(conn) if d.path.is_dir())

    # Writes configuration file
    config = directory / 'config.yml'
    distribution = platform.linux_distribution()[0:2]
    oldconfig = not overwrite and config.exists()
    cur = conn.cursor()
    if not oldconfig:
        runs = []
        # This gets all the top-level processes (p.parent ISNULL) and the first
        # executed file for that process (sorting by ids, which are
        # chronological)
        executions = cur.execute(
                '''
                SELECT e.name, e.argv, e.envp, e.workingdir, p.exitcode
                FROM processes p
                JOIN executed_files e ON e.id=(
                    SELECT id FROM executed_files e2
                    WHERE e2.process=p.id
                    ORDER BY e2.id
                    LIMIT 1
                )
                WHERE p.parent ISNULL;
                ''')
    else:
        # Loads in previous config
        runs, oldpkgs, oldfiles, patterns = load_config(config,
                                                        canonical=False,
                                                        File=TracedFile)
        # Here, additional patterns are discarded

        # Same query as previous block but only gets last process
        executions = cur.execute(
                '''
                SELECT e.name, e.argv, e.envp, e.workingdir, p.exitcode
                FROM processes p
                JOIN executed_files e ON e.id=(
                    SELECT id FROM executed_files e2
                    WHERE e2.process=p.id
                    ORDER BY e2.id
                    LIMIT 1
                )
                WHERE p.parent ISNULL
                ORDER BY p.id DESC
                LIMIT 1;
                ''')
        inputs = inputs[-1:]
        outputs = outputs[-1:]

        files, packages = merge_files(files, packages,
                                      oldfiles,
                                      oldpkgs)
    for ((r_name, r_argv, r_envp, r_workingdir, r_exitcode),
            input_files, output_files) in izip(executions, inputs, outputs):
        # Decodes command-line
        argv = r_argv.split('\0')
        if not argv[-1]:
            argv = argv[:-1]

        # Decodes environment
        envp = r_envp.split('\0')
        if not envp[-1]:
            envp = envp[:-1]
        environ = dict(v.split('=', 1) for v in envp)

        # Gets files from command-line
        command_line_files = {}
        for i, arg in enumerate(argv):
            p = Path(r_workingdir, arg).resolve()
            if p.is_file():
                command_line_files[p] = i
        input_files_on_cmdline = sum(1
                                     for in_file in input_files
                                     if in_file in command_line_files)
        output_files_on_cmdline = sum(1
                                      for out_file in output_files
                                      if out_file in command_line_files)

        # Labels input files
        input_files_dict = {}
        for in_file in input_files:
            # If file is on the command-line
            if in_file in command_line_files:
                if input_files_on_cmdline > 1:
                    label = "arg_%d" % command_line_files[in_file]
                else:
                    label = "arg"
            # Else, use file's name
            else:
                label = in_file.unicodename
            # Make labels unique
            uniquelabel = label
            i = 1
            while uniquelabel in input_files_dict:
                i += 1
                uniquelabel = '%s_%d' % (label, i)
            input_files_dict[uniquelabel] = str(in_file)
        # TODO : Note that right now, we keep as input files the ones that
        # don't appear on the command-line

        # Labels output files
        output_files_dict = {}
        for out_file in output_files:
            # If file is on the command-line
            if out_file in command_line_files:
                if output_files_on_cmdline > 1:
                    label = "arg_%d" % command_line_files[out_file]
                else:
                    label = "arg"
            # Else, use file's name
            else:
                label = out_file.unicodename
            # Make labels unique
            uniquelabel = label
            i = 1
            while uniquelabel in output_files_dict:
                i += 1
                uniquelabel = '%s_%d' % (label, i)
            output_files_dict[uniquelabel] = str(out_file)
        # TODO : Note that right now, we keep as output files the ones that
        # don't appear on the command-line

        runs.append({'binary': r_name, 'argv': argv,
                     'workingdir': Path(r_workingdir).path,
                     'architecture': platform.machine().lower(),
                     'distribution': distribution,
                     'hostname': platform.node(),
                     'system': [platform.system(), platform.release()],
                     'environ': environ,
                     'uid': os.getuid(),
                     'gid': os.getgid(),
                     'signal' if r_exitcode & 0x0100 else 'exitcode':
                         r_exitcode & 0xFF,
                     'input_files': input_files_dict,
                     'output_files': output_files_dict})
    cur.close()

    conn.close()

    save_config(config, runs, packages, files, reprozip_version)

    print("Configuration file written in {0!s}".format(config))
    print("Edit that file then run the packer -- "
          "use 'reprozip pack -h' for help")