def urlcache(url, days=1, text=True, headers={}): 'Return Path object to local cache of url contents.' p = Path( os.path.join(options.visidata_dir, 'cache', urllib.parse.quote(url, safe=''))) if p.exists(): secs = time.time() - modtime(p) if secs < days * 24 * 60 * 60: return p if not p.parent.exists(): os.makedirs(p.parent, exist_ok=True) req = Request(url) for k, v in headers.items(): req.add_header(k, v) with urlopen(req) as fp: ret = fp.read() if text: ret = ret.decode('utf-8').strip() with p.open_text(mode='w') as fpout: fpout.write(ret) else: with p.open_bytes(mode='w') as fpout: fpout.write(ret) return p
def save_tabulate(sheet, givenpath, confirm_overwrite=True): p = Path(givenpath) if p.exists(): if confirm_overwrite: confirm(f"{givenpath} already exists. overwrite? ") _save_table(sheet, options.tabulate_format, p)
def test_s3_download(tmp_path, s3_setup, path_info): """Make sure that we can download files and nothing gets lost along the way. """ sheet = load_vd_sheet(f"s3://{path_info.s3_bucket}") sheet.download(sheet.rows, Path(tmp_path)) vd.sync() assert {path_info.base_filename, path_info.gzip_filename} <= set( f.name for f in Path(tmp_path).glob("**/*") )
def moveFile(self, row, val): fn = row.name + row.ext newpath = os.path.join(val, fn) if not newpath.startswith('/'): newpath = os.path.join(self.source.resolve(), newpath) parent = Path(newpath).parent if parent.exists(): if not parent.is_dir(): error('destination %s not a directory' % parent) else: with contextlib.suppress(FileExistsError): os.makedirs(parent.resolve()) os.rename(row.resolve(), newpath) row.fqpn = newpath self.restat(row)
def test_s3_roundtrip(tmp_path, s3_setup, path_info): """ Upload a sample file to our mock S3 server, then confirm that a VisiData round trip brings back the same data. """ out = tmp_path / "sample.json" sheet = load_vd_sheet(f"s3://{path_info.s3_bucket}/{path_info.s3_key}") vd.save_json(Path(out), sheet) with open(path_info.local_file, "r") as f1, open(out, "r") as f2: assert json.load(f1) == json.load(f2)
def test_local_roundtrip(tmp_path, path_info): """ Be sure that a round trip of our sample JSON file works as expected before getting S3 into the mix. """ out = tmp_path / "sample.json" sheet = load_vd_sheet(path_info.local_file) vd.save_json(Path(out), sheet) with open(path_info.local_file, "r") as f1, open(out, "r") as f2: assert json.load(f1) == json.load(f2)
def test_s3_gzip_roundtrip(tmp_path, s3_setup, path_info): """ Zip and then upload a sample file to our mock S3 server. Confirm that a VisiData round trip handles the decompression and outputs the same data. """ out = tmp_path / "sample.json" sheet = load_vd_sheet(f"s3://{path_info.s3_bucket}/{path_info.s3_key}.gz") vd.save_json(Path(out), sheet) with open(path_info.local_file, "r") as f1, open(out, "r") as f2: assert json.load(f1) == json.load(f2)
def urlcache(url, cachesecs=24*60*60): 'Returns Path object to local cache of url contents.' p = Path(os.path.join(options.visidata_dir, 'cache', urllib.parse.quote(url, safe=''))) if p.exists(): secs = time.time() - p.stat().st_mtime if secs < cachesecs: return p if not p.parent.exists(): os.makedirs(p.parent.resolve(), exist_ok=True) assert p.parent.is_dir(), p.parent req = urllib.request.Request(url, headers={'User-Agent': __version_info__}) with urllib.request.urlopen(req) as fp: ret = fp.read().decode('utf-8').strip() with p.open_text(mode='w') as fpout: fpout.write(ret) return p
def _walkfiles(p): basepath = str(p) for folder, subdirs, files in os.walk(basepath): subfolder = folder[len(basepath)+1:] if subfolder in ['.', '..']: continue fpath = Path(folder) yield fpath for fn in files: yield fpath/fn
def eval_vd(logpath, *args, **kwargs): 'Instantiate logpath with args/kwargs replaced and replay all commands.' log = logpath.read_text() if args or kwargs: log = log.format(*args, **kwargs) src = Path(logpath.given, fp=io.StringIO(log), filesize=len(log)) vs = openSource(src, filetype=src.ext) vs.name += '_vd' vd.push(vs) vs.vd = vd return vs
def save(self, vs, filetype): 'Copy rows to the system clipboard.' # use NTF to generate filename and delete file on context exit with tempfile.NamedTemporaryFile(suffix='.' + filetype) as temp: vd.sync(saveSheets(Path(temp.name), vs)) p = subprocess.Popen(self.get_command('copy'), stdin=open(temp.name, 'r', encoding=options.encoding), stdout=subprocess.DEVNULL, close_fds=True) p.communicate()
def reload(self): self.toBeDeleted = [] self.rows = [] basepath = self.source.resolve() for folder, subdirs, files in os.walk(basepath): subfolder = folder[len(basepath) + 1:] if subfolder.startswith('.'): continue for fn in files: if fn.startswith('.'): continue p = Path(os.path.join(folder, fn)) self.rows.append(p) # sort by modtime initially self.rows.sort(key=lambda row: row.stat().st_mtime, reverse=True)
def reload(self): from pkg_resources import resource_filename cmdlist = TsvSheet('cmdlist', source=Path( resource_filename(__name__, 'commands.tsv'))) cmdlist.reload_sync() self.cmddict = {} for cmdrow in cmdlist.rows: self.cmddict[(cmdrow.sheet, cmdrow.longname)] = cmdrow self.revbinds = { longname: keystrokes for (keystrokes, _), longname in bindkeys.iter(self.source) if keystrokes not in self.revbinds } self.rows = [] for (k, o), v in commands.iter(self.source): self.addRow(v) v.sheet = o
def openurl_http(path, filetype=None): import requests response = requests.get(path.given, stream=True) # if filetype not given, auto-detect with hacky mime-type parse if not filetype: contenttype = response.headers['content-type'] subtype = contenttype.split(';')[0].split('/')[-1] filetype = content_filetypes.get(subtype, subtype) # If no charset is provided by response headers, use the user-specified # encoding option (which defaults to UTF-8) and hope for the best. The # alternative is an error because iter_lines() will produce bytes. We're # streaming so can't use response.apparent_encoding. if not response.encoding: response.encoding = options.encoding # create resettable iterator over contents fp = RepeatFile(iter_lines=response.iter_lines(decode_unicode=True)) # call open_<filetype> with a usable Path return openSource(Path(path.given, fp=fp), filetype=filetype)
def main_vd(): 'Open the given sources using the VisiData interface.' import argparse parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('inputs', nargs='*', help='initial sources') parser.add_argument('-f', dest='filetype', default='', help='uses loader for filetype instead of file extension') parser.add_argument('-y', dest='confirm_overwrite', default=None, action='store_false', help='overwrites existing files without confirmation') parser.add_argument('-p', '--play', dest='play', default=None, help='replays a saved .vd file within the interface') parser.add_argument('-P', dest='preplay', action='append', default=[], help='VisiData command to preplay before cmdlog replay') parser.add_argument('-b', '--batch', dest='batch', action='store_true', default=False, help='replays in batch mode (with no interface and all status sent to stdout)') parser.add_argument('-o', '--output', dest='output', default=None, help='saves the final visible sheet to output at the end of replay') parser.add_argument('-w', dest='replay_wait', default=0, help='time to wait between replayed commands, in seconds') parser.add_argument('-d', dest='delimiter', help='delimiter to use for tsv/usv filetype') parser.add_argument('--diff', dest='diff', default=None, help='show diffs from all sheets against this source') parser.add_argument('-v', '--version', action='version', version=__version_info__) args = vd.parseArgs(parser) # fetch motd and plugins *after* options parsing/setting visidata.PluginsSheet().reload() domotd() locale.setlocale(locale.LC_ALL, '') flPipedInput = not sys.stdin.isatty() flPipedOutput = not sys.stdout.isatty() vd._stdin, vd._stdout = duptty() # always dup stdin/stdout stdinSource = Path('-', fp=vd._stdin) # parse args, including +sheetname:subsheet:4:3 starting at row:col on sheetname:subsheet[:...] start_positions = [] # (list_of_sheetstr, str, str) # empty sheetstr means all sheets startsheets, startrow, startcol = [], None, None fmtargs = [] fmtkwargs = {} inputs = [] for arg in args.inputs: if arg.startswith('+'): # position cursor at start if ':' in arg: pos = arg[1:].split(':') if len(pos) == 1: startsheet = [Path(inputs[-1]).name] if inputs else None start_positions.append((startsheet, pos[0], None)) elif len(pos) == 2: startsheet = [Path(inputs[-1]).name] if inputs else None startrow, startcol = pos start_positions.append((None, startrow, startcol)) elif len(pos) >= 3: startsheets = pos[:-2] startrow, startcol = pos[-2:] start_positions.append((startsheets, startrow, startcol)) else: start_positions.append((None, arg[1:], None)) elif args.play and '=' in arg: # parse 'key=value' pairs for formatting cmdlog template in replay mode k, v = arg.split('=') fmtkwargs[k] = v elif arg == '-': inputs.append(stdinSource) else: inputs.append(arg) fmtargs.append(arg) if args.diff: vs = openSource(args.diff) vd.push(vs) vs.reload() setDiffSheet(vs) if args.batch: options.undo = False vd.status = lambda *args, **kwargs: print(*args, file=sys.stderr) # ignore kwargs (like priority) vd.editline = lambda *args, **kwargs: '' vd.execAsync = lambda func, *args, **kwargs: func(*args, **kwargs) # disable async for cmd in args.preplay: Sheet('').exec_keystrokes(cmd) if not args.play: if flPipedInput and not inputs: # '|vd' without explicit '-' inputs.append(stdinSource) sources = [] for src in inputs: vs = openSource(src) vd.cmdlog.openHook(vs, src) sources.append(vs) vd.sheets.extend(sources) # purposefully do not load everything if not vd.sheets and not args.play and not args.batch: vd.push(vd.vdmenu) if not args.play: if args.batch: vd.push(sources[0]) sources[0].reload() for startsheets, startrow, startcol in start_positions: sheets = [] # sheets to apply startrow:startcol to if not startsheets: sheets = sources # apply row/col to all sheets else: vs = vd.getSheet(startsheets[0]) or sources[-1] vd.sync(vs.ensureLoaded()) vd.clearCaches() for startsheet in startsheets[1:]: rowidx = vs.getRowIndexFromStr(startsheet) if rowidx is None: vs = None vd.warning(f'no sheet "{startsheet}"') break vs = vs.rows[rowidx] vd.sync(vs.ensureLoaded()) vd.clearCaches() if vs: vd.push(vs) sheets = [vs] if startrow: for vs in sheets: if vs: vs.moveToRow(startrow) or vd.warning(f'{vs} has no row "{startrow}"') if startcol: for vs in sheets: if vs: vs.moveToCol(startcol) or vd.warning(f'{vs} has no column "{startcol}"') if not args.batch: run(vd.sheets[0]) else: if args.play == '-': vdfile = stdinSource vdfile.name = 'stdin.vd' else: vdfile = Path(args.play) vs = eval_vd(vdfile, *fmtargs, **fmtkwargs) vd.sync(vs.reload()) if args.batch: if vd.replay_sync(vs): # error return 1 else: vd.replay(vs) run() if vd.sheets and (flPipedOutput or args.output): outpath = Path(args.output or '-') saveSheets(outpath, vd.sheets[0], confirm_overwrite=False) vd.sync() vd._stdout.flush() return 0
def currentDirSheet(p): 'Support opening the current DirSheet from the vdmenu' return DirSheet('.', source=Path('.'))
def iterload(self): for fn in self.source.open_text(): yield Path(fn.rstrip())
def reload(self): from fecfile import fecparser self.rows = [] row_dict = {} itemization_subsheets = {} def addSheetRow(component_name): "On first encountering a component, add a row to the filing sheet" cls = COMPONENT_SHEET_CLASSES[component_name] source_cls = list if cls in [FECItemizationSheet, TextSheet ] else dict vs = cls( joinSheetnames(self.name, component_name), component_name=component_name, source=source_cls(), size=0, ) vs.reload() row_dict[component_name] = vs self.addRow(vs) src = Path(self.source.resolve()) item_iter = fecparser.iter_lines(src, {"as_strings": True}) for item in item_iter: dtype = item.data_type if dtype not in row_dict.keys(): addSheetRow(dtype) sheet_row = row_dict[dtype] if dtype in ["header", "summary"]: sheet_row.source = item.data sheet_row.reload() elif dtype == "text": if len(sheet_row.source) == 0: sheet_row.set_columns_from_row(item.data) sheet_row.source.append(item.data) sheet_row.addRow(item.data) sheet_row.size += 1 elif dtype == "F99_text": sheet_row.source = item.data.split("\n") sheet_row.size = len(sheet_row.source) elif dtype == "itemization": form_type = item.data["form_type"] if form_type[0] == "S": form_type = "Schedule " + item.data["form_type"][1] if form_type not in sheet_row.source: sheet_row.source[form_type] = [] subsheet = FECItemizationSheet( joinSheetnames(sheet_row.name, form_type), schedule_name=form_type, source=[], size=0, ) subsheet.reload() subsheet.set_columns_from_row(item.data) sheet_row.addRow(subsheet) itemization_subsheets[form_type] = subsheet else: subsheet = itemization_subsheets[form_type] subsheet.addRow(item.data) subsheet.source.append(item.data) subsheet.size += 1 sheet_row.source[form_type].append(item.data) sheet_row.size += 1