def synthesis(prepare_res): opts = DotDict((k, v) for k, v in options.items() if k in a_csvimport.options) lst = prepare_res previous = datasets.previous msg = ProgressMsg(lst) with status('importing') as update: for fn, info, dsn in lst: update(msg.step('importing')) opts.filename = fn show_fn = '%s:%s' % (options.filename, info.filename,) ds = build('csvimport', options=opts, previous=previous, caption='Import of ' + show_fn).dataset() previous = ds.link_to_here(dsn, filename=show_fn) if options.chaining == 'off': previous = datasets.previous if (len(lst) == 1 or options.chaining != 'off') and dsn != 'default': ds.link_to_here('default', filename=show_fn)
def synthesis(): sum = 0 jobs = datasets.source.chain(length=options.chain_length, stop_ds=datasets.stop) for src in jobs: data = build('dataset_checksum', columns=options.columns, sort=options.sort, source=src).load() sum ^= data.sum print("Total: %016x" % (sum, )) return DotDict(sum=sum, columns=data.columns, sort=options.sort, sources=jobs)
def synthesis(prepare_res, analysis_res): separator, _, _, filename, _, labels, dw, bad_dw, skipped_dw, fds, success_fh, _, = prepare_res # Analysis may have gotten a perfectly legitimate EOF if something # went wrong in the reader process, so we need to check that all # went well. reader_res = [] try: success_fh.seek(0) reader_res = success_fh.read() except OSError: pass if reader_res != b"\0": reader_res = reader_res.decode("utf-8", "replace").strip("\r\n \t\0") raise Exception(reader_res or "Reader process failed") success_fh.close() os.unlink("reader.success") good_counts = [] bad_counts = [] skipped_counts = [] for sliceno, (good_count, bad_count, skipped_count) in enumerate(analysis_res): dw.set_lines(sliceno, good_count) if bad_dw: bad_dw.set_lines(sliceno, bad_count) if skipped_dw: skipped_dw.set_lines(sliceno, skipped_count) good_counts.append(good_count) bad_counts.append(bad_count) skipped_counts.append(skipped_count) for dw in ( bad_dw, skipped_dw, dw, ): if dw: dw.set_compressions("gzip") return DotDict( num_lines=sum(good_counts), lines_per_slice=good_counts, num_broken_lines=sum(bad_counts), broken_lines_per_slice=bad_counts, num_skipped_lines=sum(skipped_counts), skipped_lines_per_slice=skipped_counts, )
default is to include directories. ''' from zipfile import ZipFile from shutil import copyfileobj from os.path import join import re from accelerator.compat import uni from . import a_csvimport from accelerator import DotDict, OptionEnum, build depend_extra = (a_csvimport, ) options = DotDict(a_csvimport.options) options.inside_filenames = { } # {"filename in zip": "dataset name"} or empty to import all files options.chaining = OptionEnum('off on by_filename by_dsname').on options.include_re = "" # Regex of files to include. (Matches anywhere, use ^$ as needed.) options.exclude_re = "" # Regex of files to exclude, takes priority over include. options.strip_dirs = False # Strip directories from filename (a/b/c -> c) datasets = ('previous', ) def namefix(d, name): ok = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz._-' name = ''.join(c if c in ok else '_' for c in uni(name)) if name == 'default' and options.chaining != 'off': name = 'default_'