Example #1
0
    def run(self):
        super().run()
        self.build_config()

        # Parse the matcher query
        if self.args.qmacro:
            strquery = ""
            qmacro_query = self.query
        else:
            if self.query is None:
                self.parser.error(
                    "you need to specify a filter expression or use --file")
            strquery = self.query
            qmacro_query = None

        if strquery:
            query = arkimet.Matcher(
                arkimet.dataset.http.expand_remote_query(
                    self.config, strquery))
        else:
            query = arkimet.Matcher()

        with self.outfile() as outfd:
            arki_query = arkimet.cmdline.ArkiQuery()
            arki_query.set_inputs(self.config)
            arki_query.set_processor(
                query=query,
                outfile=outfd,
                yaml=self.args.yaml,
                json=self.args.json,
                annotate=self.args.annotate,
                inline=self.args.inline,
                data=self.args.data,
                summary=self.args.summary,
                summary_short=self.args.summary_short,
                summary_restrict=self.args.summary_restrict,
                archive=self.args.archive,
                postproc=self.args.postproc,
                postproc_data=self.args.postproc_data,
                sort=self.args.sort,
            )

            if self.args.stdin:
                arki_query.query_file(sys.stdin, self.args.stdin)
            elif self.args.merged:
                arki_query.query_merged()
            elif self.args.qmacro:
                arki_query.query_qmacro(self.args.qmacro, qmacro_query)
            else:
                arki_query.query_sections()
Example #2
0
    def run(self):
        super().run()

        if self.args.query:
            if not self.args.input:
                self.parser.error("--query needs a query on the command line")
            matcher = arkimet.Matcher(self.args.input)
            print(matcher.expanded)
            raise Exit()

        if self.args.aliases:
            if self.args.input:
                sections = arkimet.dataset.http.get_alias_database(
                    self.args.input)
            else:
                sections = arkimet.get_alias_database()

            if self.args.output:
                with open(self.args.output, "wt") as fd:
                    sections.write(fd)
            else:
                sections.write(sys.stdout)
            raise Exit()

        if self.args.config:
            self.parser.error(
                "please use arki-mergeconf instead of arki-dump --config")

        if self.args.info:
            import json
            cfg = arkimet.config()
            print(json.dumps(cfg, indent=1))
            raise Exit()

        if self.args.bbox:
            dump = arkimet.cmdline.ArkiDump()
            with self.input("rb") as fd:
                bbox = dump.bbox(fd)
            with self.output("wt") as fd:
                print(bbox, file=fd)
            raise Exit()

        if self.args.from_yaml_data:
            dump = arkimet.cmdline.ArkiDump()
            with self.input("rb") as fdin:
                with self.output("wb") as fdout:
                    raise Exit(dump.reverse_data(fdin, fdout))

        if self.args.from_yaml_summary:
            dump = arkimet.cmdline.ArkiDump()
            with self.input("rb") as fdin:
                with self.output("wb") as fdout:
                    raise Exit(dump.reverse_summary(fdin, fdout))

        dump = arkimet.cmdline.ArkiDump()
        with self.input("rb") as fdin:
            with self.output("wb") as fdout:
                raise Exit(dump.dump_yaml(fdin, fdout, self.args.annotate))
Example #3
0
 def test_query_summary_style_yaml(self):
     """
     Test style=yaml summary queries
     """
     res = requests.post(self.server_url +
                         "/dataset/test200/summary?style=yaml",
                         data={
                             "query": arki.Matcher().expanded,
                         })
     self.assertEqual(res.text[:11], "SummaryItem")
Example #4
0
 def test_query_global_summary_style_binary(self):
     """
     Test style=binary summary queries
     """
     res = requests.post(self.server_url + "/summary?style=binary",
                         data={
                             "query": arki.Matcher().expanded,
                         })
     res.raise_for_status()
     self.assertEqual(res.content[:2], b"SU")
Example #5
0
 def test_query_summaryshort_style_yaml(self):
     """
     Test style=yaml summary queries
     """
     res = requests.post(self.server_url +
                         "/dataset/test200/summaryshort?style=yaml",
                         data={
                             "query": arki.Matcher().expanded,
                         })
     res.raise_for_status()
     self.assertEqual(res.text[:12], "SummaryStats")
Example #6
0
 def test_query_summaryshort_style_json(self):
     """
     Test style=json summary short queries
     """
     res = requests.post(self.server_url +
                         "/dataset/test200/summaryshort?style=json",
                         data={
                             "query": arki.Matcher().expanded,
                         })
     res.raise_for_status()
     self.assertIn("items", res.json())
Example #7
0
    def __init__(self, macro_cfg, datasets_cfg, macro_args, query):
        self.macro_cfg = macro_cfg,
        self.datasets_cfg = datasets_cfg

        # The argument, if provided, is a date used to expand @ in date expressions
        macro_args = macro_args.strip()
        if macro_args:
            self.date_ref = datetime.datetime.strptime(macro_args,
                                                       "%Y-%m-%d").date()
        else:
            self.date_ref = None

        # Rows to query
        self.rows = []

        # Parse query
        for idx, line in enumerate(query.splitlines(), start=1):
            mo = self.re_line.match(line)
            if not mo:
                raise RuntimeError("query:{}: line not parsed: {}".format(
                    idx, repr(line)))

            dsname = mo.group("ds")
            reftime = datetime.datetime.combine(self._to_date(mo.group("d")),
                                                self._to_time(mo.group("t")))

            matcher = arki.Matcher(
                "reftime: ={reftime:%Y-%m-%dT%H:%M:%S}Z; "
                "timerange: {trange}; level: {level}; product: {product}".
                format(reftime=reftime,
                       trange=self._to_matcher(mo.group("s")),
                       level=self._to_matcher(mo.group("l")),
                       product=self._to_matcher(mo.group("v"))))

            self.rows.append(Row(idx, line, reftime, dsname, matcher))

        # Sort rows by reftime
        self.rows.sort(key=lambda x: x.reftime)
Example #8
0
 def test_empty(self):
     matcher = arki.Matcher()
     self.assertEqual(str(matcher), "")
Example #9
0
    def test_docs(self):
        """
        Run the doc/matcher/*.txt as doctests
        """
        re_given = re.compile(
            r"^Given (\w+ ``[^`]+``(?:\s*,\s*\w+ ``[^`]+``)*)")
        re_given_item = re.compile(r"(\w+) ``([^`]+)``")
        re_matches = re.compile(r"^\* ``(.+)`` matches\b")
        re_not_matches = re.compile(r"^\* ``(.+)`` does not match\b")
        re_doctest_info = re.compile(r".. doctest (\w+): (.+)")

        docdir = os.path.join(os.environ["TOP_SRCDIR"], "doc")
        doctests = [os.path.join(docdir, "matcher.rst")]
        doctests += glob.glob(os.path.join(docdir, "matcher", "*.rst"))
        self.assertTrue(doctests)

        for pathname in doctests:
            md = None
            expected_matches = 0
            expected_not_matches = 0
            count_matches = 0
            count_not_matches = 0
            with open(pathname, "rt", encoding="utf-8") as fd:
                for lineno, line in enumerate(fd):
                    mo = re_given.match(line)
                    if mo:
                        md = arki.Metadata()
                        for m in re_given_item.finditer(mo.group(1)):
                            try:
                                md[m.group(1)] = m.group(2)
                            except Exception as e:
                                self.fail(
                                    "{}:{}: cannot parse metadata in line {}: {}"
                                    .format(pathname, lineno, line.rstrip(),
                                            e))
                        continue

                    mo = re_matches.match(line)
                    if mo:
                        if md is None:
                            self.fail(
                                "{}:{}: `matches` line found before Given line"
                                .format(pathname, lineno))

                        matcher = arki.Matcher(mo.group(1))
                        self.assertTrue(matcher.match(md))
                        count_matches += 1
                        continue

                    mo = re_not_matches.match(line)
                    if mo:
                        if md is None:
                            self.fail(
                                "{}:{}: `does not match` line found before Given line"
                                .format(pathname, lineno))

                        matcher = arki.Matcher(mo.group(1))
                        self.assertFalse(matcher.match(md))
                        count_not_matches += 1
                        continue

                    mo = re_doctest_info.match(line)
                    if mo:
                        tag = mo.group(1)
                        if tag == "matched":
                            expected_matches = int(mo.group(2))
                        elif tag == "not_matched":
                            expected_not_matches = int(mo.group(2))
                        continue

                if (expected_matches != count_matches
                        or expected_not_matches != count_not_matches):
                    self.fail(
                        "{}: test count expectations failed: matches {}/{}, not matches {}/{}"
                        .format(pathname, count_matches, expected_matches,
                                count_not_matches, expected_not_matches))
Example #10
0
 def test_parseerror(self):
     with self.assertRaises(ValueError):
         arki.Matcher("invalid")
Example #11
0
 def test_parse(self):
     matcher = arki.Matcher("reftime:=today")
     self.assertEqual(str(matcher), "reftime:=today")
Example #12
0
    def run(self):
        super().run()

        # Process --config options
        if self.args.config is not None:
            for pathname in self.args.config:
                cfg = arki.dataset.read_configs(pathname)
                for name, section in cfg.items():
                    self.add_config_section(section, name)

        # Read the config files from the remaining commandline arguments
        for path in self.args.sources:
            if path.startswith("http://") or path.startswith("https://"):
                sections = arki.dataset.http.load_cfg_sections(path)
                for name, section in sections.items():
                    self.add_config_section(section, name)
            else:
                section = arki.dataset.read_config(path)
                self.add_config_section(section)

        if not self.config:
            raise Fail(
                "you need to specify at least one config file or dataset")

        # Remove unallowed entries
        if self.args.restrict:
            self.filter_restrict(self.args.restrict)

        if self.args.ignore_system_datasets:
            to_remove = []

            for name, section in self.config.items():
                type = section.get("type")
                name = section.get("name")

                if (type == "error" or type == "duplicates"
                        or (type == "remote" and
                            (name == "error" or name == "duplicates"))):
                    to_remove.append(name)

            for name in to_remove:
                del self.config[name]

        if not self.config:
            raise Fail("none of the configuration provided were useable")

        # Validate the configuration
        has_errors = False
        for name, section in self.config.items():
            # Validate filters
            filter = section.get("filter")
            if filter is None:
                continue

            try:
                arki.Matcher(filter)
            except ValueError as e:
                print("{}: {}".format(name, e), file=sys.stderr)
                has_errors = True
        if has_errors:
            raise Fail("Some input files did not validate.")

        # If requested, compute extra information
        if self.args.extra:
            for name, section in self.config.items():
                # Instantiate the dataset
                ds = arki.dataset.Reader(section)
                # Get the summary
                summary = ds.query_summary()
                # Compute bounding box, and store the WKT in bounding
                bbox = summary.get_convex_hull()
                if bbox:
                    section["bounding"] = bbox

        # Output the merged configuration
        if self.args.output:
            with open(self.args.output, "wt") as fd:
                self.config.write(fd)
        else:
            self.config.write(sys.stdout)
Example #13
0
    def run(self):
        super().run()
        self.build_config()

        with self.outfile() as outfd:
            arki_scan = arkimet.cmdline.ArkiScan()
            arki_scan.set_inputs(self.config)
            arki_scan.set_processor(
                query=arkimet.Matcher(),
                outfile=outfd,
                yaml=self.args.yaml,
                json=self.args.json,
                annotate=self.args.annotate,
                inline=self.args.inline,
                data=self.args.data,
                summary=self.args.summary,
                summary_short=self.args.summary_short,
                summary_restrict=self.args.summary_restrict,
                archive=self.args.archive,
                postproc=self.args.postproc,
                postproc_data=self.args.postproc_data,
                sort=self.args.sort,
            )

            if self.args.dispatch or self.args.testdispatch:
                kw = dict(
                    copyok=self.args.copyok,
                    copyko=self.args.copyko,
                    validate=self.args.validate,
                    flush_threshold=(self.parse_size(self.args.flush_threshold)
                                     if self.args.flush_threshold is not None
                                     else 0),
                )

                if self.args.dispatch:
                    dispatch_cfg = arkimet.cfg.Sections()
                    for source in self.args.dispatch:
                        self.merge_config(dispatch_cfg,
                                          arkimet.cfg.Sections.parse(source))
                    kw["dispatch"] = dispatch_cfg
                elif self.args.testdispatch:
                    dispatch_cfg = arkimet.cfg.Sections()
                    for source in self.args.testdispatch:
                        self.merge_config(dispatch_cfg,
                                          arkimet.cfg.Sections.parse(source))
                    kw["testdispatch"] = dispatch_cfg

                arki_scan.set_dispatcher(**kw)

                if self.args.stdin:
                    all_successful = arki_scan.dispatch_file(
                        sys.stdin,
                        self.args.stdin,
                        ignore_duplicates=self.args.ignore_duplicates,
                        status=self.args.status)
                else:
                    all_successful = arki_scan.dispatch_sections(
                        moveok=self.args.moveok,
                        moveko=self.args.moveko,
                        movework=self.args.movework,
                        ignore_duplicates=self.args.ignore_duplicates,
                        status=self.args.status)
            else:
                if self.args.stdin:
                    all_successful = arki_scan.scan_file(
                        sys.stdin, self.args.stdin)
                else:
                    all_successful = arki_scan.scan_sections()

            if not all_successful:
                raise Exit(posix.EX_DATAERR)