コード例 #1
0
def process_files_column(infiles, options):
    """Processes the given files in ``column`` mode.
    
    Files will be processed sequentially. The output is a single line for
    each file where column i contains the result of the aggregation function
    for the column of the file.
    """
    for idx, filename in enumerate(infiles):
        process_files_column_single(open_anything(filename), options, idx == 0)
コード例 #2
0
ファイル: groupby.py プロジェクト: ntamas/swissknife
def process_file(infile, options):
    """Processes the given file."""
    # Calculate the column indices we are interested in
    if options.fields:
        col_idxs = [f - 1 for f in options.fields]
    else:
        col_idxs = None

    # Dictionary to map keys to values
    if options.unique:
        keys_to_values = defaultdict(set)
    else:
        keys_to_values = defaultdict(list)

    # Some caching to avoid costly lookups
    delim = options.in_delimiter
    fields = options.fields
    join = options.out_delimiter.join

    # Set up characters to strip from lines
    chars_to_strip = " \t\r\n" if options.strip else "\r\n"

    for line in open_anything(infile):
        # Split the input line
        parts = line.strip(chars_to_strip).split(delim)

        # Select the relevant columns only
        if col_idxs:
            parts = sublist(parts, col_idxs)

        # If the row is empty, continue
        if not parts:
            continue

        # Store the row to its appropriate key
        if options.unique:
            keys_to_values[parts[0]].update(parts[1:])
        else:
            keys_to_values[parts[0]].extend(parts[1:])

    # Print the key-value pairs
    for key, values in keys_to_values.items():
        print(join(chain([key], values)))
コード例 #3
0
ファイル: qplot.py プロジェクト: ntamas/swissknife
def plot_file_on_figure(infile, figure, options):
    """Plots the dataset in the given file on the given figure."""
    iterator = TableWithHeaderIterator(
        open_anything(infile),
        delimiter=options.delimiter,
        every=options.every,
        fields=options.fields,
        strip=options.strip,
    )
    iterator.first_column_is_date = "x" in options.dates
    func = globals()["plot_%s_from_table_iterator" % options.type]
    func(iterator, figure, options)

    # Add the title
    if not options.no_title:
        if options.title is None:
            if infile != "-":
                figure.suptitle(infile)
        else:
            figure.suptitle(options.title)
コード例 #4
0
ファイル: remap.py プロジェクト: ntamas/swissknife
def remap_file(infile, mapper, options):
    """Remaps the entries in the given file using the given callable mapper."""
    for line in open_anything(infile):
        parts = line.strip().split(options.delimiter)
        new_parts = []
        skip = False
        for idx, part in enumerate(parts, 1):
            try:
                if idx in options.fields:
                    new_parts.append(mapper(part))
                else:
                    new_parts.append(part)
            except KeyError:
                raise UnknownIDError(part)
            except SkipColumnException:
                pass
            except SkipRowException:
                skip = True
                break
        if not skip:
            print(options.delimiter.join(new_parts))
コード例 #5
0
ファイル: remap.py プロジェクト: ntamas/swissknife
def load_mapping(fname, options):
    """Loads a mapping from the given file and returns a dict-like
    object."""
    if options.missing_action == "fail":
        data = {}
    elif options.missing_action == "warn":
        data = cautiousdict()
    elif options.missing_action == "skip":
        data = skippingdict(exc=SkipRowException)
    elif options.missing_action == "empty":
        data = skippingdict(exc=SkipColumnException)
    else:
        data = lenientdict()

    old, new = options.mapping_fields
    old -= 1
    new -= 1
    for row in open_anything(fname):
        parts = row.strip().split(options.mapping_delimiter)
        data[parts[old]] = parts[new]
    return data
コード例 #6
0
def process_files_multiple(infiles, options):
    """Processes the given files in ``multiple`` mode.
    
    Files will be processed in parallel; row i of each file will be aggregated
    using the aggregation function into row i of the output."""
    # Calculate the column indices we are interested in
    if options.fields:
        col_idxs = [f - 1 for f in options.fields]
    else:
        col_idxs = None

    # Some caching to avoid costly lookups
    delim = options.in_delimiter
    fields = options.fields
    func = options.function
    join = options.out_delimiter.join

    # Flag to denote whether we have seen at least one row with numbers.
    # If not, we are still processing the headers.
    data_started = False

    for lines in zip(*[open_anything(f) for f in infiles]):
        # Split the input line
        lines = [line.strip().split(delim) for line in lines]

        # Select the relevant columns only
        if col_idxs:
            lines = [sublist(line, col_idxs) for line in lines]

        if not data_started:
            # Check whether this row contains numbers only (at least in the
            # columns we are interested in)
            if any(not only_numbers(line) for line in lines):
                # This is a header, print it from the first file, assuming
                # that the remaining files contain the same header
                if hasattr(func, "argout"):
                    headers = []
                    for header in lines[0]:
                        headers.extend("%s_%s" %
                                       (header, arg) if arg else header
                                       for arg in func.argout)
                    print(join(headers))
                else:
                    print(join(lines[0]))
                continue
            else:
                # Yay, finally real data!
                data_started = True

        # Convert the columns of interest to floats
        lines = [[float(x) for x in line] for line in lines]

        # Print the output
        row = []
        for items in zip(*lines):
            result = func(items)
            if hasattr(result, "__iter__"):
                row.extend(str(item) for item in result)
            else:
                row.append(str(result))
        print(join(row))