Python izip Examples

Programming Language: Python

Namespace/Package Name: reprozip.utils

Method/Function: izip

Examples at hotexamples.com: 4

Python izip - 4 examples found. These are the top rated real world Python examples of reprozip.utils.izip extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: trace.py Project: prashant182/reprozip

def compile_inputs_outputs(runs, inputs, outputs):
    """Gives names to input/output files and creates InputOutputFile objects.
    """
    # {path: (run_nb, arg_nb) or None}
    runs_with_file = {}
    # run_nb: number_of_file_arguments
    nb_file_args = []
    # {path: [runs]}
    readers = {}
    writers = {}

    for run_nb, run, in_files, out_files in izip(count(), runs,
                                                 inputs, outputs):
        # List which runs read or write each file
        for p in in_files:
            readers.setdefault(p, []).append(run_nb)
        for p in out_files:
            writers.setdefault(p, []).append(run_nb)

        # Locate files that appear on a run's command line
        files_set = set(in_files) | set(out_files)
        nb_files = 0
        for arg_nb, arg in enumerate(run['argv']):
            p = Path(run['workingdir'], arg).resolve()
            if p in files_set:
                nb_files += 1
                if p not in runs_with_file:
                    runs_with_file[p] = run_nb, arg_nb
                elif runs_with_file[p] is not None:
                    runs_with_file[p] = None
        nb_file_args.append(nb_files)

    file_names = {}
    make_unique = UniqueNames()

    for fi in flatten(2, (inputs, outputs)):
        if fi in file_names:
            continue

        # If it appears in at least one of the command-lines
        if fi in runs_with_file:
            # If it only appears once in the command-lines
            if runs_with_file[fi] is not None:
                run_nb, arg_nb = runs_with_file[fi]
                parts = []
                # Run number, if there are more than one runs
                if len(runs) > 1:
                    parts.append(run_nb)
                # Argument number, if there are more than one file arguments
                if nb_file_args[run_nb] > 1:
                    parts.append(arg_nb)
                file_names[fi] = make_unique(
                    'arg%s' % '_'.join('%s' % s for s in parts))
            else:
                file_names[fi] = make_unique('arg_%s' % fi.unicodename)
        else:
            file_names[fi] = make_unique(fi.unicodename)

    return dict((n, InputOutputFile(p, readers.get(p, []), writers.get(p, [])))
                for p, n in iteritems(file_names))

Example #2

Show file

File: trace.py Project: hugobowne/reprozip

def compile_inputs_outputs(runs, inputs, outputs):
    """Gives names to input/output files and creates InputOutputFile objects.
    """
    # {path: (run_nb, arg_nb) or None}
    runs_with_file = {}
    # run_nb: number_of_file_arguments
    nb_file_args = []
    # {path: [runs]}
    readers = {}
    writers = {}

    for run_nb, run, in_files, out_files in izip(count(), runs,
                                                 inputs, outputs):
        # List which runs read or write each file
        for p in in_files:
            readers.setdefault(p, []).append(run_nb)
        for p in out_files:
            writers.setdefault(p, []).append(run_nb)

        # Locate files that appear on a run's command line
        files_set = set(in_files) | set(out_files)
        nb_files = 0
        for arg_nb, arg in enumerate(run['argv']):
            p = Path(run['workingdir'], arg).resolve()
            if p in files_set:
                nb_files += 1
                if p not in runs_with_file:
                    runs_with_file[p] = run_nb, arg_nb
                elif runs_with_file[p] is not None:
                    runs_with_file[p] = None
        nb_file_args.append(nb_files)

    file_names = {}
    make_unique = UniqueNames()

    for fi in flatten(2, (inputs, outputs)):
        if fi in file_names:
            continue

        # If it appears in at least one of the command-lines
        if fi in runs_with_file:
            # If it only appears once in the command-lines
            if runs_with_file[fi] is not None:
                run_nb, arg_nb = runs_with_file[fi]
                parts = []
                # Run number, if there are more than one runs
                if len(runs) > 1:
                    parts.append(run_nb)
                # Argument number, if there are more than one file arguments
                if nb_file_args[run_nb] > 1:
                    parts.append(arg_nb)
                file_names[fi] = make_unique(
                    'arg%s' % '_'.join('%s' % s for s in parts))
            else:
                file_names[fi] = make_unique('arg_%s' % fi.unicodename)
        else:
            file_names[fi] = make_unique(fi.unicodename)

    return dict((n, InputOutputFile(p, readers.get(p, []), writers.get(p, [])))
                for p, n in iteritems(file_names))

Example #3

Show file

File: trace.py Project: Aloma/reprozip

def write_configuration(directory, sort_packages, overwrite=False):
    """Writes the canonical YAML configuration file.
    """
    database = directory / 'trace.sqlite3'

    if PY3:
        # On PY3, connect() only accepts unicode
        conn = sqlite3.connect(str(database))
    else:
        conn = sqlite3.connect(database.path)
    conn.row_factory = sqlite3.Row

    # Reads info from database
    files, inputs, outputs = get_files(conn)

    # Identifies which file comes from which package
    if sort_packages:
        files, packages = identify_packages(files)
    else:
        packages = []

    # Makes sure all the directories used as working directories are packed
    # (they already do if files from them are used, but empty directories do
    # not get packed inside a tar archive)
    files.update(d for d in list_directories(conn) if d.path.is_dir())

    # Writes configuration file
    config = directory / 'config.yml'
    distribution = platform.linux_distribution()[0:2]
    oldconfig = not overwrite and config.exists()
    cur = conn.cursor()
    if oldconfig:
        # Loads in previous config
        runs, oldpkgs, oldfiles, patterns = load_config(config,
                                                        canonical=False,
                                                        File=TracedFile)
        # Here, additional patterns are discarded

        executions = cur.execute(
                '''
                SELECT e.name, e.argv, e.envp, e.workingdir, p.exitcode
                FROM executed_files e
                INNER JOIN processes p on p.id=e.id
                WHERE p.parent ISNULL
                ORDER BY p.id DESC
                LIMIT 1;
                ''')
        inputs = inputs[-1:]

        files, packages = merge_files(files, packages,
                                      oldfiles,
                                      oldpkgs)
    else:
        runs = []
        executions = cur.execute(
                '''
                SELECT e.name, e.argv, e.envp, e.workingdir, p.exitcode
                FROM executed_files e
                INNER JOIN processes p on p.id=e.id
                WHERE p.parent ISNULL
                ORDER BY p.id;
                ''')
    for ((r_name, r_argv, r_envp, r_workingdir, r_exitcode),
            input_files, output_files) in izip(executions, inputs, outputs):
        # Decodes command-line
        argv = r_argv.split('\0')
        if not argv[-1]:
            argv = argv[:-1]

        # Decodes environment
        envp = r_envp.split('\0')
        if not envp[-1]:
            envp = envp[:-1]
        environ = dict(v.split('=', 1) for v in envp)

        # Gets files from command-line
        command_line_files = {}
        for i, arg in enumerate(argv):
            p = Path(r_workingdir, arg).resolve()
            if p.is_file():
                command_line_files[p] = i
        input_files_on_cmdline = sum(1
                                     for in_file in input_files
                                     if in_file in command_line_files)
        output_files_on_cmdline = sum(1
                                      for out_file in input_files
                                      if out_file in command_line_files)

        # Labels input files
        input_files_dict = {}
        for in_file in input_files:
            # If file is on the command-line
            if in_file in command_line_files:
                if input_files_on_cmdline > 1:
                    label = "arg_%d" % command_line_files[in_file]
                else:
                    label = "arg"
            # Else, use file's name
            else:
                label = in_file.unicodename
            # Make labels unique
            uniquelabel = label
            i = 1
            while uniquelabel in input_files_dict:
                i += 1
                uniquelabel = '%s_%d' % (label, i)
            input_files_dict[uniquelabel] = str(in_file)
        # TODO : Note that right now, we keep as input files the ones that
        # don't appear on the command-line

        # Labels output files
        output_files_dict = {}
        for out_file in output_files:
            # If file is on the command-line
            if out_file in command_line_files:
                if output_files_on_cmdline > 1:
                    label = "arg_%d" % command_line_files[out_file]
                else:
                    label = "arg"
            # Else, use file's name
            else:
                label = out_file.unicodename
            # Make labels unique
            uniquelabel = label
            i = 1
            while uniquelabel in output_files_dict:
                i += 1
                uniquelabel = '%s_%d' % (label, i)
            output_files_dict[uniquelabel] = str(out_file)
        # TODO : Note that right now, we keep as output files the ones that
        # don't appear on the command-line

        runs.append({'binary': r_name, 'argv': argv,
                     'workingdir': Path(r_workingdir).path,
                     'architecture': platform.machine().lower(),
                     'distribution': distribution,
                     'hostname': platform.node(),
                     'system': [platform.system(), platform.release()],
                     'environ': environ,
                     'uid': os.getuid(),
                     'gid': os.getgid(),
                     'signal' if r_exitcode & 0x0100 else 'exitcode':
                         r_exitcode & 0xFF,
                     'input_files': input_files_dict,
                     'output_files': output_files_dict})
    cur.close()

    conn.close()

    save_config(config, runs, packages, files, reprozip_version)

    print("Configuration file written in {0!s}".format(config))
    print("Edit that file then run the packer -- "
          "use 'reprozip pack -h' for help")

Example #4

Show file

def write_configuration(directory, sort_packages, overwrite=False):
    """Writes the canonical YAML configuration file.
    """
    database = directory / 'trace.sqlite3'

    if PY3:
        # On PY3, connect() only accepts unicode
        conn = sqlite3.connect(str(database))
    else:
        conn = sqlite3.connect(database.path)
    conn.row_factory = sqlite3.Row

    # Reads info from database
    files, inputs, outputs = get_files(conn)

    # Identifies which file comes from which package
    if sort_packages:
        files, packages = identify_packages(files)
    else:
        packages = []

    # Makes sure all the directories used as working directories are packed
    # (they already do if files from them are used, but empty directories do
    # not get packed inside a tar archive)
    files.update(d for d in list_directories(conn) if d.path.is_dir())

    # Writes configuration file
    config = directory / 'config.yml'
    distribution = platform.linux_distribution()[0:2]
    oldconfig = not overwrite and config.exists()
    cur = conn.cursor()
    if not oldconfig:
        runs = []
        # This gets all the top-level processes (p.parent ISNULL) and the first
        # executed file for that process (sorting by ids, which are
        # chronological)
        executions = cur.execute(
                '''
                SELECT e.name, e.argv, e.envp, e.workingdir, p.exitcode
                FROM processes p
                JOIN executed_files e ON e.id=(
                    SELECT id FROM executed_files e2
                    WHERE e2.process=p.id
                    ORDER BY e2.id
                    LIMIT 1
                )
                WHERE p.parent ISNULL;
                ''')
    else:
        # Loads in previous config
        runs, oldpkgs, oldfiles, patterns = load_config(config,
                                                        canonical=False,
                                                        File=TracedFile)
        # Here, additional patterns are discarded

        # Same query as previous block but only gets last process
        executions = cur.execute(
                '''
                SELECT e.name, e.argv, e.envp, e.workingdir, p.exitcode
                FROM processes p
                JOIN executed_files e ON e.id=(
                    SELECT id FROM executed_files e2
                    WHERE e2.process=p.id
                    ORDER BY e2.id
                    LIMIT 1
                )
                WHERE p.parent ISNULL
                ORDER BY p.id DESC
                LIMIT 1;
                ''')
        inputs = inputs[-1:]
        outputs = outputs[-1:]

        files, packages = merge_files(files, packages,
                                      oldfiles,
                                      oldpkgs)
    for ((r_name, r_argv, r_envp, r_workingdir, r_exitcode),
            input_files, output_files) in izip(executions, inputs, outputs):
        # Decodes command-line
        argv = r_argv.split('\0')
        if not argv[-1]:
            argv = argv[:-1]

        # Decodes environment
        envp = r_envp.split('\0')
        if not envp[-1]:
            envp = envp[:-1]
        environ = dict(v.split('=', 1) for v in envp)

        # Gets files from command-line
        command_line_files = {}
        for i, arg in enumerate(argv):
            p = Path(r_workingdir, arg).resolve()
            if p.is_file():
                command_line_files[p] = i
        input_files_on_cmdline = sum(1
                                     for in_file in input_files
                                     if in_file in command_line_files)
        output_files_on_cmdline = sum(1
                                      for out_file in output_files
                                      if out_file in command_line_files)

        # Labels input files
        input_files_dict = {}
        for in_file in input_files:
            # If file is on the command-line
            if in_file in command_line_files:
                if input_files_on_cmdline > 1:
                    label = "arg_%d" % command_line_files[in_file]
                else:
                    label = "arg"
            # Else, use file's name
            else:
                label = in_file.unicodename
            # Make labels unique
            uniquelabel = label
            i = 1
            while uniquelabel in input_files_dict:
                i += 1
                uniquelabel = '%s_%d' % (label, i)
            input_files_dict[uniquelabel] = str(in_file)
        # TODO : Note that right now, we keep as input files the ones that
        # don't appear on the command-line

        # Labels output files
        output_files_dict = {}
        for out_file in output_files:
            # If file is on the command-line
            if out_file in command_line_files:
                if output_files_on_cmdline > 1:
                    label = "arg_%d" % command_line_files[out_file]
                else:
                    label = "arg"
            # Else, use file's name
            else:
                label = out_file.unicodename
            # Make labels unique
            uniquelabel = label
            i = 1
            while uniquelabel in output_files_dict:
                i += 1
                uniquelabel = '%s_%d' % (label, i)
            output_files_dict[uniquelabel] = str(out_file)
        # TODO : Note that right now, we keep as output files the ones that
        # don't appear on the command-line

        runs.append({'binary': r_name, 'argv': argv,
                     'workingdir': Path(r_workingdir).path,
                     'architecture': platform.machine().lower(),
                     'distribution': distribution,
                     'hostname': platform.node(),
                     'system': [platform.system(), platform.release()],
                     'environ': environ,
                     'uid': os.getuid(),
                     'gid': os.getgid(),
                     'signal' if r_exitcode & 0x0100 else 'exitcode':
                         r_exitcode & 0xFF,
                     'input_files': input_files_dict,
                     'output_files': output_files_dict})
    cur.close()

    conn.close()

    save_config(config, runs, packages, files, reprozip_version)

    print("Configuration file written in {0!s}".format(config))
    print("Edit that file then run the packer -- "
          "use 'reprozip pack -h' for help")