Beispiel #1
0
def urlcache(url, days=1, text=True, headers={}):
    'Return Path object to local cache of url contents.'
    p = Path(
        os.path.join(options.visidata_dir, 'cache',
                     urllib.parse.quote(url, safe='')))
    if p.exists():
        secs = time.time() - modtime(p)
        if secs < days * 24 * 60 * 60:
            return p

    if not p.parent.exists():
        os.makedirs(p.parent, exist_ok=True)

    req = Request(url)
    for k, v in headers.items():
        req.add_header(k, v)

    with urlopen(req) as fp:
        ret = fp.read()
        if text:
            ret = ret.decode('utf-8').strip()
            with p.open_text(mode='w') as fpout:
                fpout.write(ret)
        else:
            with p.open_bytes(mode='w') as fpout:
                fpout.write(ret)

    return p
Beispiel #2
0
def save_tabulate(sheet, givenpath, confirm_overwrite=True):
    p = Path(givenpath)

    if p.exists():
        if confirm_overwrite:
            confirm(f"{givenpath} already exists. overwrite? ")

    _save_table(sheet, options.tabulate_format, p)
Beispiel #3
0
def test_s3_download(tmp_path, s3_setup, path_info):
    """Make sure that we can download files and nothing gets
    lost along the way.
    """
    sheet = load_vd_sheet(f"s3://{path_info.s3_bucket}")
    sheet.download(sheet.rows, Path(tmp_path))
    vd.sync()
    assert {path_info.base_filename, path_info.gzip_filename} <= set(
        f.name for f in Path(tmp_path).glob("**/*")
    )
Beispiel #4
0
    def moveFile(self, row, val):
        fn = row.name + row.ext
        newpath = os.path.join(val, fn)
        if not newpath.startswith('/'):
            newpath = os.path.join(self.source.resolve(), newpath)

        parent = Path(newpath).parent
        if parent.exists():
            if not parent.is_dir():
                error('destination %s not a directory' % parent)
        else:
            with contextlib.suppress(FileExistsError):
                os.makedirs(parent.resolve())

        os.rename(row.resolve(), newpath)
        row.fqpn = newpath
        self.restat(row)
Beispiel #5
0
def test_s3_roundtrip(tmp_path, s3_setup, path_info):
    """
    Upload a sample file to our mock S3 server, then confirm that
    a VisiData round trip brings back the same data.
    """
    out = tmp_path / "sample.json"
    sheet = load_vd_sheet(f"s3://{path_info.s3_bucket}/{path_info.s3_key}")
    vd.save_json(Path(out), sheet)
    with open(path_info.local_file, "r") as f1, open(out, "r") as f2:
        assert json.load(f1) == json.load(f2)
Beispiel #6
0
def test_local_roundtrip(tmp_path, path_info):
    """
    Be sure that a round trip of our sample JSON file works
    as expected before getting S3 into the mix.
    """
    out = tmp_path / "sample.json"
    sheet = load_vd_sheet(path_info.local_file)
    vd.save_json(Path(out), sheet)
    with open(path_info.local_file, "r") as f1, open(out, "r") as f2:
        assert json.load(f1) == json.load(f2)
Beispiel #7
0
def test_s3_gzip_roundtrip(tmp_path, s3_setup, path_info):
    """
    Zip and then upload a sample file to our mock S3 server. Confirm
    that a VisiData round trip handles the decompression and outputs
    the same data.
    """
    out = tmp_path / "sample.json"
    sheet = load_vd_sheet(f"s3://{path_info.s3_bucket}/{path_info.s3_key}.gz")
    vd.save_json(Path(out), sheet)
    with open(path_info.local_file, "r") as f1, open(out, "r") as f2:
        assert json.load(f1) == json.load(f2)
Beispiel #8
0
def urlcache(url, cachesecs=24*60*60):
    'Returns Path object to local cache of url contents.'
    p = Path(os.path.join(options.visidata_dir, 'cache', urllib.parse.quote(url, safe='')))
    if p.exists():
        secs = time.time() - p.stat().st_mtime
        if secs < cachesecs:
            return p

    if not p.parent.exists():
        os.makedirs(p.parent.resolve(), exist_ok=True)

    assert p.parent.is_dir(), p.parent

    req = urllib.request.Request(url, headers={'User-Agent': __version_info__})
    with urllib.request.urlopen(req) as fp:
        ret = fp.read().decode('utf-8').strip()
        with p.open_text(mode='w') as fpout:
            fpout.write(ret)

    return p
Beispiel #9
0
        def _walkfiles(p):
            basepath = str(p)
            for folder, subdirs, files in os.walk(basepath):
                subfolder = folder[len(basepath)+1:]
                if subfolder in ['.', '..']: continue

                fpath = Path(folder)
                yield fpath

                for fn in files:
                    yield fpath/fn
Beispiel #10
0
def eval_vd(logpath, *args, **kwargs):
    'Instantiate logpath with args/kwargs replaced and replay all commands.'
    log = logpath.read_text()
    if args or kwargs:
        log = log.format(*args, **kwargs)

    src = Path(logpath.given, fp=io.StringIO(log), filesize=len(log))
    vs = openSource(src, filetype=src.ext)
    vs.name += '_vd'
    vd.push(vs)
    vs.vd = vd
    return vs
Beispiel #11
0
    def save(self, vs, filetype):
        'Copy rows to the system clipboard.'

        # use NTF to generate filename and delete file on context exit
        with tempfile.NamedTemporaryFile(suffix='.' + filetype) as temp:
            vd.sync(saveSheets(Path(temp.name), vs))
            p = subprocess.Popen(self.get_command('copy'),
                                 stdin=open(temp.name,
                                            'r',
                                            encoding=options.encoding),
                                 stdout=subprocess.DEVNULL,
                                 close_fds=True)
            p.communicate()
Beispiel #12
0
    def reload(self):
        self.toBeDeleted = []
        self.rows = []
        basepath = self.source.resolve()
        for folder, subdirs, files in os.walk(basepath):
            subfolder = folder[len(basepath) + 1:]
            if subfolder.startswith('.'): continue
            for fn in files:
                if fn.startswith('.'): continue
                p = Path(os.path.join(folder, fn))
                self.rows.append(p)

        # sort by modtime initially
        self.rows.sort(key=lambda row: row.stat().st_mtime, reverse=True)
Beispiel #13
0
    def reload(self):
        from pkg_resources import resource_filename
        cmdlist = TsvSheet('cmdlist',
                           source=Path(
                               resource_filename(__name__, 'commands.tsv')))
        cmdlist.reload_sync()
        self.cmddict = {}
        for cmdrow in cmdlist.rows:
            self.cmddict[(cmdrow.sheet, cmdrow.longname)] = cmdrow

        self.revbinds = {
            longname: keystrokes
            for (keystrokes, _), longname in bindkeys.iter(self.source)
            if keystrokes not in self.revbinds
        }
        self.rows = []
        for (k, o), v in commands.iter(self.source):
            self.addRow(v)
            v.sheet = o
Beispiel #14
0
def openurl_http(path, filetype=None):
    import requests

    response = requests.get(path.given, stream=True)

    # if filetype not given, auto-detect with hacky mime-type parse
    if not filetype:
        contenttype = response.headers['content-type']
        subtype = contenttype.split(';')[0].split('/')[-1]
        filetype = content_filetypes.get(subtype, subtype)

    # If no charset is provided by response headers, use the user-specified
    # encoding option (which defaults to UTF-8) and hope for the best.  The
    # alternative is an error because iter_lines() will produce bytes.  We're
    # streaming so can't use response.apparent_encoding.
    if not response.encoding:
        response.encoding = options.encoding

    # create resettable iterator over contents
    fp = RepeatFile(iter_lines=response.iter_lines(decode_unicode=True))

    # call open_<filetype> with a usable Path
    return openSource(Path(path.given, fp=fp), filetype=filetype)
Beispiel #15
0
def main_vd():
    'Open the given sources using the VisiData interface.'
    import argparse
    parser = argparse.ArgumentParser(description=__doc__)

    parser.add_argument('inputs', nargs='*', help='initial sources')
    parser.add_argument('-f', dest='filetype', default='', help='uses loader for filetype instead of file extension')
    parser.add_argument('-y', dest='confirm_overwrite', default=None, action='store_false', help='overwrites existing files without confirmation')
    parser.add_argument('-p', '--play', dest='play', default=None, help='replays a saved .vd file within the interface')
    parser.add_argument('-P', dest='preplay', action='append', default=[], help='VisiData command to preplay before cmdlog replay')
    parser.add_argument('-b', '--batch', dest='batch', action='store_true', default=False, help='replays in batch mode (with no interface and all status sent to stdout)')
    parser.add_argument('-o', '--output', dest='output', default=None, help='saves the final visible sheet to output at the end of replay')
    parser.add_argument('-w', dest='replay_wait', default=0, help='time to wait between replayed commands, in seconds')
    parser.add_argument('-d', dest='delimiter', help='delimiter to use for tsv/usv filetype')
    parser.add_argument('--diff', dest='diff', default=None, help='show diffs from all sheets against this source')
    parser.add_argument('-v', '--version', action='version', version=__version_info__)

    args = vd.parseArgs(parser)

    # fetch motd and plugins *after* options parsing/setting
    visidata.PluginsSheet().reload()
    domotd()

    locale.setlocale(locale.LC_ALL, '')

    flPipedInput = not sys.stdin.isatty()
    flPipedOutput = not sys.stdout.isatty()

    vd._stdin, vd._stdout = duptty()  # always dup stdin/stdout

    stdinSource = Path('-', fp=vd._stdin)

    # parse args, including +sheetname:subsheet:4:3 starting at row:col on sheetname:subsheet[:...]
    start_positions = []  # (list_of_sheetstr, str, str)  # empty sheetstr means all sheets
    startsheets, startrow, startcol = [], None, None
    fmtargs = []
    fmtkwargs = {}
    inputs = []
    for arg in args.inputs:
        if arg.startswith('+'):  # position cursor at start
            if ':' in arg:
                pos = arg[1:].split(':')
                if len(pos) == 1:
                    startsheet = [Path(inputs[-1]).name] if inputs else None
                    start_positions.append((startsheet, pos[0], None))
                elif len(pos) == 2:
                    startsheet = [Path(inputs[-1]).name] if inputs else None
                    startrow, startcol = pos
                    start_positions.append((None, startrow, startcol))
                elif len(pos) >= 3:
                    startsheets = pos[:-2]
                    startrow, startcol = pos[-2:]
                    start_positions.append((startsheets, startrow, startcol))
            else:
                start_positions.append((None, arg[1:], None))

        elif args.play and '=' in arg:
            # parse 'key=value' pairs for formatting cmdlog template in replay mode
            k, v = arg.split('=')
            fmtkwargs[k] = v
        elif arg == '-':
            inputs.append(stdinSource)
        else:
            inputs.append(arg)
            fmtargs.append(arg)

    if args.diff:
        vs = openSource(args.diff)
        vd.push(vs)
        vs.reload()
        setDiffSheet(vs)

    if args.batch:
        options.undo = False
        vd.status = lambda *args, **kwargs: print(*args, file=sys.stderr)  # ignore kwargs (like priority)
        vd.editline = lambda *args, **kwargs: ''
        vd.execAsync = lambda func, *args, **kwargs: func(*args, **kwargs) # disable async

    for cmd in args.preplay:
        Sheet('').exec_keystrokes(cmd)

    if not args.play:
        if flPipedInput and not inputs:  # '|vd' without explicit '-'
            inputs.append(stdinSource)

    sources = []
    for src in inputs:
        vs = openSource(src)
        vd.cmdlog.openHook(vs, src)
        sources.append(vs)

    vd.sheets.extend(sources)  # purposefully do not load everything

    if not vd.sheets and not args.play and not args.batch:
        vd.push(vd.vdmenu)

    if not args.play:
        if args.batch:
            vd.push(sources[0])
            sources[0].reload()

        for startsheets, startrow, startcol in start_positions:
            sheets = []  # sheets to apply startrow:startcol to
            if not startsheets:
                sheets = sources  # apply row/col to all sheets
            else:
                vs = vd.getSheet(startsheets[0]) or sources[-1]
                vd.sync(vs.ensureLoaded())
                vd.clearCaches()
                for startsheet in startsheets[1:]:
                    rowidx = vs.getRowIndexFromStr(startsheet)
                    if rowidx is None:
                        vs = None
                        vd.warning(f'no sheet "{startsheet}"')
                        break
                    vs = vs.rows[rowidx]
                    vd.sync(vs.ensureLoaded())
                    vd.clearCaches()
                if vs:
                    vd.push(vs)
                    sheets = [vs]

            if startrow:
                for vs in sheets:
                    if vs:
                        vs.moveToRow(startrow) or vd.warning(f'{vs} has no row "{startrow}"')

            if startcol:
                for vs in sheets:
                    if vs:
                        vs.moveToCol(startcol) or vd.warning(f'{vs} has no column "{startcol}"')

        if not args.batch:
            run(vd.sheets[0])
    else:
        if args.play == '-':
            vdfile = stdinSource
            vdfile.name = 'stdin.vd'
        else:
            vdfile = Path(args.play)

        vs = eval_vd(vdfile, *fmtargs, **fmtkwargs)
        vd.sync(vs.reload())
        if args.batch:
            if vd.replay_sync(vs):  # error
                return 1
        else:
            vd.replay(vs)
            run()

    if vd.sheets and (flPipedOutput or args.output):
        outpath = Path(args.output or '-')
        saveSheets(outpath, vd.sheets[0], confirm_overwrite=False)
        vd.sync()

    vd._stdout.flush()

    return 0
Beispiel #16
0
def currentDirSheet(p):
    'Support opening the current DirSheet from the vdmenu'
    return DirSheet('.', source=Path('.'))
Beispiel #17
0
 def iterload(self):
     for fn in self.source.open_text():
         yield Path(fn.rstrip())
Beispiel #18
0
    def reload(self):
        from fecfile import fecparser
        self.rows = []

        row_dict = {}
        itemization_subsheets = {}

        def addSheetRow(component_name):
            "On first encountering a component, add a row to the filing sheet"

            cls = COMPONENT_SHEET_CLASSES[component_name]

            source_cls = list if cls in [FECItemizationSheet, TextSheet
                                         ] else dict

            vs = cls(
                joinSheetnames(self.name, component_name),
                component_name=component_name,
                source=source_cls(),
                size=0,
            )

            vs.reload()
            row_dict[component_name] = vs
            self.addRow(vs)

        src = Path(self.source.resolve())

        item_iter = fecparser.iter_lines(src, {"as_strings": True})

        for item in item_iter:
            dtype = item.data_type
            if dtype not in row_dict.keys():
                addSheetRow(dtype)

            sheet_row = row_dict[dtype]

            if dtype in ["header", "summary"]:
                sheet_row.source = item.data
                sheet_row.reload()

            elif dtype == "text":
                if len(sheet_row.source) == 0:
                    sheet_row.set_columns_from_row(item.data)
                sheet_row.source.append(item.data)
                sheet_row.addRow(item.data)
                sheet_row.size += 1

            elif dtype == "F99_text":
                sheet_row.source = item.data.split("\n")
                sheet_row.size = len(sheet_row.source)

            elif dtype == "itemization":
                form_type = item.data["form_type"]

                if form_type[0] == "S":
                    form_type = "Schedule " + item.data["form_type"][1]

                if form_type not in sheet_row.source:
                    sheet_row.source[form_type] = []
                    subsheet = FECItemizationSheet(
                        joinSheetnames(sheet_row.name, form_type),
                        schedule_name=form_type,
                        source=[],
                        size=0,
                    )
                    subsheet.reload()
                    subsheet.set_columns_from_row(item.data)
                    sheet_row.addRow(subsheet)
                    itemization_subsheets[form_type] = subsheet
                else:
                    subsheet = itemization_subsheets[form_type]

                subsheet.addRow(item.data)
                subsheet.source.append(item.data)
                subsheet.size += 1

                sheet_row.source[form_type].append(item.data)
                sheet_row.size += 1