def run(self): super().run() self.build_config() # Parse the matcher query if self.args.qmacro: strquery = "" qmacro_query = self.query else: if self.query is None: self.parser.error( "you need to specify a filter expression or use --file") strquery = self.query qmacro_query = None if strquery: query = arkimet.Matcher( arkimet.dataset.http.expand_remote_query( self.config, strquery)) else: query = arkimet.Matcher() with self.outfile() as outfd: arki_query = arkimet.cmdline.ArkiQuery() arki_query.set_inputs(self.config) arki_query.set_processor( query=query, outfile=outfd, yaml=self.args.yaml, json=self.args.json, annotate=self.args.annotate, inline=self.args.inline, data=self.args.data, summary=self.args.summary, summary_short=self.args.summary_short, summary_restrict=self.args.summary_restrict, archive=self.args.archive, postproc=self.args.postproc, postproc_data=self.args.postproc_data, sort=self.args.sort, ) if self.args.stdin: arki_query.query_file(sys.stdin, self.args.stdin) elif self.args.merged: arki_query.query_merged() elif self.args.qmacro: arki_query.query_qmacro(self.args.qmacro, qmacro_query) else: arki_query.query_sections()
def run(self): super().run() if self.args.query: if not self.args.input: self.parser.error("--query needs a query on the command line") matcher = arkimet.Matcher(self.args.input) print(matcher.expanded) raise Exit() if self.args.aliases: if self.args.input: sections = arkimet.dataset.http.get_alias_database( self.args.input) else: sections = arkimet.get_alias_database() if self.args.output: with open(self.args.output, "wt") as fd: sections.write(fd) else: sections.write(sys.stdout) raise Exit() if self.args.config: self.parser.error( "please use arki-mergeconf instead of arki-dump --config") if self.args.info: import json cfg = arkimet.config() print(json.dumps(cfg, indent=1)) raise Exit() if self.args.bbox: dump = arkimet.cmdline.ArkiDump() with self.input("rb") as fd: bbox = dump.bbox(fd) with self.output("wt") as fd: print(bbox, file=fd) raise Exit() if self.args.from_yaml_data: dump = arkimet.cmdline.ArkiDump() with self.input("rb") as fdin: with self.output("wb") as fdout: raise Exit(dump.reverse_data(fdin, fdout)) if self.args.from_yaml_summary: dump = arkimet.cmdline.ArkiDump() with self.input("rb") as fdin: with self.output("wb") as fdout: raise Exit(dump.reverse_summary(fdin, fdout)) dump = arkimet.cmdline.ArkiDump() with self.input("rb") as fdin: with self.output("wb") as fdout: raise Exit(dump.dump_yaml(fdin, fdout, self.args.annotate))
def test_query_summary_style_yaml(self): """ Test style=yaml summary queries """ res = requests.post(self.server_url + "/dataset/test200/summary?style=yaml", data={ "query": arki.Matcher().expanded, }) self.assertEqual(res.text[:11], "SummaryItem")
def test_query_global_summary_style_binary(self): """ Test style=binary summary queries """ res = requests.post(self.server_url + "/summary?style=binary", data={ "query": arki.Matcher().expanded, }) res.raise_for_status() self.assertEqual(res.content[:2], b"SU")
def test_query_summaryshort_style_yaml(self): """ Test style=yaml summary queries """ res = requests.post(self.server_url + "/dataset/test200/summaryshort?style=yaml", data={ "query": arki.Matcher().expanded, }) res.raise_for_status() self.assertEqual(res.text[:12], "SummaryStats")
def test_query_summaryshort_style_json(self): """ Test style=json summary short queries """ res = requests.post(self.server_url + "/dataset/test200/summaryshort?style=json", data={ "query": arki.Matcher().expanded, }) res.raise_for_status() self.assertIn("items", res.json())
def __init__(self, macro_cfg, datasets_cfg, macro_args, query): self.macro_cfg = macro_cfg, self.datasets_cfg = datasets_cfg # The argument, if provided, is a date used to expand @ in date expressions macro_args = macro_args.strip() if macro_args: self.date_ref = datetime.datetime.strptime(macro_args, "%Y-%m-%d").date() else: self.date_ref = None # Rows to query self.rows = [] # Parse query for idx, line in enumerate(query.splitlines(), start=1): mo = self.re_line.match(line) if not mo: raise RuntimeError("query:{}: line not parsed: {}".format( idx, repr(line))) dsname = mo.group("ds") reftime = datetime.datetime.combine(self._to_date(mo.group("d")), self._to_time(mo.group("t"))) matcher = arki.Matcher( "reftime: ={reftime:%Y-%m-%dT%H:%M:%S}Z; " "timerange: {trange}; level: {level}; product: {product}". format(reftime=reftime, trange=self._to_matcher(mo.group("s")), level=self._to_matcher(mo.group("l")), product=self._to_matcher(mo.group("v")))) self.rows.append(Row(idx, line, reftime, dsname, matcher)) # Sort rows by reftime self.rows.sort(key=lambda x: x.reftime)
def test_empty(self): matcher = arki.Matcher() self.assertEqual(str(matcher), "")
def test_docs(self): """ Run the doc/matcher/*.txt as doctests """ re_given = re.compile( r"^Given (\w+ ``[^`]+``(?:\s*,\s*\w+ ``[^`]+``)*)") re_given_item = re.compile(r"(\w+) ``([^`]+)``") re_matches = re.compile(r"^\* ``(.+)`` matches\b") re_not_matches = re.compile(r"^\* ``(.+)`` does not match\b") re_doctest_info = re.compile(r".. doctest (\w+): (.+)") docdir = os.path.join(os.environ["TOP_SRCDIR"], "doc") doctests = [os.path.join(docdir, "matcher.rst")] doctests += glob.glob(os.path.join(docdir, "matcher", "*.rst")) self.assertTrue(doctests) for pathname in doctests: md = None expected_matches = 0 expected_not_matches = 0 count_matches = 0 count_not_matches = 0 with open(pathname, "rt", encoding="utf-8") as fd: for lineno, line in enumerate(fd): mo = re_given.match(line) if mo: md = arki.Metadata() for m in re_given_item.finditer(mo.group(1)): try: md[m.group(1)] = m.group(2) except Exception as e: self.fail( "{}:{}: cannot parse metadata in line {}: {}" .format(pathname, lineno, line.rstrip(), e)) continue mo = re_matches.match(line) if mo: if md is None: self.fail( "{}:{}: `matches` line found before Given line" .format(pathname, lineno)) matcher = arki.Matcher(mo.group(1)) self.assertTrue(matcher.match(md)) count_matches += 1 continue mo = re_not_matches.match(line) if mo: if md is None: self.fail( "{}:{}: `does not match` line found before Given line" .format(pathname, lineno)) matcher = arki.Matcher(mo.group(1)) self.assertFalse(matcher.match(md)) count_not_matches += 1 continue mo = re_doctest_info.match(line) if mo: tag = mo.group(1) if tag == "matched": expected_matches = int(mo.group(2)) elif tag == "not_matched": expected_not_matches = int(mo.group(2)) continue if (expected_matches != count_matches or expected_not_matches != count_not_matches): self.fail( "{}: test count expectations failed: matches {}/{}, not matches {}/{}" .format(pathname, count_matches, expected_matches, count_not_matches, expected_not_matches))
def test_parseerror(self): with self.assertRaises(ValueError): arki.Matcher("invalid")
def test_parse(self): matcher = arki.Matcher("reftime:=today") self.assertEqual(str(matcher), "reftime:=today")
def run(self): super().run() # Process --config options if self.args.config is not None: for pathname in self.args.config: cfg = arki.dataset.read_configs(pathname) for name, section in cfg.items(): self.add_config_section(section, name) # Read the config files from the remaining commandline arguments for path in self.args.sources: if path.startswith("http://") or path.startswith("https://"): sections = arki.dataset.http.load_cfg_sections(path) for name, section in sections.items(): self.add_config_section(section, name) else: section = arki.dataset.read_config(path) self.add_config_section(section) if not self.config: raise Fail( "you need to specify at least one config file or dataset") # Remove unallowed entries if self.args.restrict: self.filter_restrict(self.args.restrict) if self.args.ignore_system_datasets: to_remove = [] for name, section in self.config.items(): type = section.get("type") name = section.get("name") if (type == "error" or type == "duplicates" or (type == "remote" and (name == "error" or name == "duplicates"))): to_remove.append(name) for name in to_remove: del self.config[name] if not self.config: raise Fail("none of the configuration provided were useable") # Validate the configuration has_errors = False for name, section in self.config.items(): # Validate filters filter = section.get("filter") if filter is None: continue try: arki.Matcher(filter) except ValueError as e: print("{}: {}".format(name, e), file=sys.stderr) has_errors = True if has_errors: raise Fail("Some input files did not validate.") # If requested, compute extra information if self.args.extra: for name, section in self.config.items(): # Instantiate the dataset ds = arki.dataset.Reader(section) # Get the summary summary = ds.query_summary() # Compute bounding box, and store the WKT in bounding bbox = summary.get_convex_hull() if bbox: section["bounding"] = bbox # Output the merged configuration if self.args.output: with open(self.args.output, "wt") as fd: self.config.write(fd) else: self.config.write(sys.stdout)
def run(self): super().run() self.build_config() with self.outfile() as outfd: arki_scan = arkimet.cmdline.ArkiScan() arki_scan.set_inputs(self.config) arki_scan.set_processor( query=arkimet.Matcher(), outfile=outfd, yaml=self.args.yaml, json=self.args.json, annotate=self.args.annotate, inline=self.args.inline, data=self.args.data, summary=self.args.summary, summary_short=self.args.summary_short, summary_restrict=self.args.summary_restrict, archive=self.args.archive, postproc=self.args.postproc, postproc_data=self.args.postproc_data, sort=self.args.sort, ) if self.args.dispatch or self.args.testdispatch: kw = dict( copyok=self.args.copyok, copyko=self.args.copyko, validate=self.args.validate, flush_threshold=(self.parse_size(self.args.flush_threshold) if self.args.flush_threshold is not None else 0), ) if self.args.dispatch: dispatch_cfg = arkimet.cfg.Sections() for source in self.args.dispatch: self.merge_config(dispatch_cfg, arkimet.cfg.Sections.parse(source)) kw["dispatch"] = dispatch_cfg elif self.args.testdispatch: dispatch_cfg = arkimet.cfg.Sections() for source in self.args.testdispatch: self.merge_config(dispatch_cfg, arkimet.cfg.Sections.parse(source)) kw["testdispatch"] = dispatch_cfg arki_scan.set_dispatcher(**kw) if self.args.stdin: all_successful = arki_scan.dispatch_file( sys.stdin, self.args.stdin, ignore_duplicates=self.args.ignore_duplicates, status=self.args.status) else: all_successful = arki_scan.dispatch_sections( moveok=self.args.moveok, moveko=self.args.moveko, movework=self.args.movework, ignore_duplicates=self.args.ignore_duplicates, status=self.args.status) else: if self.args.stdin: all_successful = arki_scan.scan_file( sys.stdin, self.args.stdin) else: all_successful = arki_scan.scan_sections() if not all_successful: raise Exit(posix.EX_DATAERR)