コード例 #1
0
    def __init__(self,
                 grmr: str,
                 tmpl: str,
                 limit: int,
                 parser: AbstractFileParserClient,
                 evt_handler: AbstractStatEventHandler = None):

        if parser is None:
            raise GrammarTestError(
                "GrammarTestError: 'parser' argument can not be None.")

        if not isinstance(parser, AbstractFileParserClient):
            raise GrammarTestError(
                "GrammarTestError: 'parser' is not an instance of AbstractFileParserClient"
            )

        if evt_handler is not None and not isinstance(
                evt_handler, AbstractStatEventHandler):
            raise GrammarTestError(
                "ArgumentError: 'evt_handler' is not an instance of AbstractStatEventHandler"
            )

        self._parser = parser
        self._event_handler = evt_handler
        self._grammar_root = grmr
        self._template_dir = tmpl
        self._linkage_limit = limit
        self._options = 0  # options
        self._is_dir_corpus = False
        self._is_dir_dict = False
        self._total_metrics = ParseMetrics()
        self._total_quality = ParseQuality()
        self._total_files = 0
        self._total_dicts = 0
コード例 #2
0
def eval_parses(test_parses: list,
                ref_parses: list,
                verbose: bool,
                ofile=sys.stdout) -> ParseQuality:
    """
        Compares test_parses against ref_parses link by link
        counting errors

    :param test_parses: List of test parses in format, prepared by get_parses.
    :param ref_parses: List of reference parses.
    :param verbose: Boolean value which enables intermediate result output if set to True.
    :param ofile: Output file handle.
    :return: ParseQuality class instance filled with the result data.
    """
    total_linkages = len(ref_parses)  # in gold standard

    total_parse_quality = ParseQuality()

    if len(ref_parses) != len(test_parses):
        raise EvalError(
            "Error: files don't contain same parses. "
            "Number of sentences missmatch. Ref={}, Test={}".format(
                len(ref_parses), len(test_parses)))

    for ref_parse, test_parse in zip(ref_parses, test_parses):

        if ref_parse[PARSE_SENTENCE] != test_parse[PARSE_SENTENCE]:
            raise EvalError(
                "Error: Something went wrong. Sentences missmatch." +
                ref_parse[PARSE_SENTENCE] + "\n" + test_parse[PARSE_SENTENCE])

        pq = parse_quality(test_parse[PARSE_LINK_SET],
                           ref_parse[PARSE_LINK_SET])

        pq.ignored += test_parse[PARSE_IGNORED]

        if verbose:
            print(ParseQuality.text(pq), file=sys.stdout)

        total_parse_quality += pq

    if total_linkages > 1:
        total_parse_quality /= float(total_linkages)

    print(ParseQuality.text(total_parse_quality), file=ofile)

    return total_parse_quality
コード例 #3
0
    def _on_dict_file(self, dict_file_path: str, args: list) -> None:
        """
        Callback method which is called for each dictionary file.

        :param dict_file_path: Path to a .dict file.
        :param args: Argument list.
        :return: None
        """
        self._total_metrics, self._total_quality = ParseMetrics(
        ), ParseQuality()
        self._total_files = 0

        try:
            dict_path = os.path.split(dict_file_path)[0]
            corp_path = args[DICT_ARG_CORP]
            dest_path = args[DICT_ARG_OUTP]

            dest_path += str(dict_path[len(args[DICT_ARG_DICT]):])

            # If BIT_LOC_LANG is set the language subdirectory is created in destination directory
            grmr_path = dest_path if self._options & BIT_LOC_LANG else self._grammar_root

            # Create new LG dictionary using .dict file and template directory with the rest of mandatory files.
            lang_path = create_grammar_dir(dict_file_path, grmr_path,
                                           self._template_dir, self._options)

            if os.path.isfile(corp_path):
                self._on_corpus_file(corp_path, [dest_path, lang_path] + args)

            elif os.path.isdir(corp_path):
                traverse_dir_tree(
                    corp_path, "",
                    [self._on_corpus_file, dest_path, lang_path] + args,
                    [self._on_corp_dir, dest_path, lang_path] + args, True)

            # If output format is set to ULL
            if not self._options & BIT_OUTPUT:
                # stat_suffix = "2" if (self._options & BIT_LG_EXE) == BIT_LG_EXE else ""
                stat_path = dest_path + "/" + os.path.split(
                    corp_path)[1] + ".stat"  #+ stat_suffix

                # Write statistics summary to a file
                self._save_stat(stat_path, self._total_metrics,
                                self._total_quality)

                # Invoke on_statistics() event handler
                if self._is_dir_dict and self._event_handler is not None:

                    self._event_handler.on_statistics(
                        (dict_path.split("/"))[::-1], self._total_metrics,
                        self._total_quality)

        except Exception as err:
            print("_on_dict_file(): " + str(type(err)) + ": " + str(err))

        self._total_dicts += 1
コード例 #4
0
    def test_on_statistics(self):
        pm, pq = ParseMetrics(), ParseQuality()

        pm.sentences, pq.sentences = 10, 10

        pm.average_parsed_ratio = Decimal("0.6")
        pq.quality = Decimal("0.4")

        self.dboard.on_statistics([
            "connectors-DRK-connectors", "LG_ANY_all_parses",
            "POC-English-NoAmb-LEFT-WALL+period"
        ], pm, pq)
コード例 #5
0
    def on_statistics(self, nodes: list, metrics: ParseMetrics,
                      quality: ParseQuality):

        # Return if dashboard is not configured.
        if self._config is None:
            return

        # row_key, col_key = None, None
        row_ind, col_ind = None, None

        try:
            # Get row and column keys
            row_key = self._config[CONF_ROW_KEY].format(*nodes)
            col_key = self._config[CONF_COL_KEY].format(*nodes)

            # Get row and column indexes
            row_ind = self._config[CONF_ROW_IND][row_key]
            col_ind = self._config[CONF_COL_IND][col_key]

        except IndexError as err:
            print("on_statatistics(): IndexError: " + str(err))
            return

        except KeyError as err:
            print("on_statatistics(): KeyError: " + str(err))
            return

        for row in row_ind:
            for col in col_ind:

                val_str = None

                try:
                    # Get value key string by column index
                    val_str = self._config[CONF_VAL_KEYS][col].format(
                        nodes=nodes,
                        parseability=metrics.parseability_str(metrics),
                        parsequality=quality.parse_quality_str(quality),
                        PQA=PQA_str(metrics, quality))

                except IndexError as err:
                    print("on_statatistics():2: IndexError: " + str(err))
                    continue

                except KeyError as err:
                    print("on_statatistics():2: KeyError: " + str(err))
                    continue

                # Put value into the table
                self.set_cell_by_indexes(row, col, val_str)
コード例 #6
0
def test_grammar_cfg(conf_path: str) -> (Decimal, Decimal, Decimal):
    """
    Test grammar using configuration(s) from a JSON file

    :param conf_path:   Path to a configuration file
    :return:            Tuple (ParseMetrics, ParseQuality) of the last processed test.
    """
    pm, pq = ParseMetrics(), ParseQuality()

    try:
        cfgman = JsonFileConfigManager(conf_path)
        # dboard = HTMLFileDashboard(cfgman)

        dboard = TextFileDashboard(cfgman) if len(
            cfgman.get_config("", "dash-board")) else None

        parser = LGInprocParser()

        # Get configuration parameters
        config = cfgman.get_config("", "grammar-tester")

        # Create GrammarTester instance
        tester = GrammarTester(handle_path_string(config[0][CONF_GRMR_PATH]),
                               handle_path_string(config[0][CONF_TMPL_PATH]),
                               config[0][CONF_LNK_LIMIT], parser, dboard)

        # Config file may have multiple configurations for one component
        for cfg in config:

            # Run grammar test
            pm, pq = tester.test(handle_path_string(cfg[CONF_DICT_PATH]),
                                 handle_path_string(cfg[CONF_CORP_PATH]),
                                 handle_path_string(cfg[CONF_DEST_PATH]),
                                 handle_path_string(cfg[CONF_REFR_PATH]),
                                 get_options(cfg))

        # Save dashboard data to whatever source the dashboard is bounded to
        dboard.update_dashboard()

        # print(pm.text(pm))

    except Exception as err:
        print(str(err))
    finally:
        return pm.parseability(pm), pq.parse_quality(pm), PQA(pm, pq)
コード例 #7
0
    def _save_stat(stat_path: str, metrics: ParseMetrics,
                   quality: ParseQuality) -> None:
        """
        Save statistic estimation results into a file.

        :param stat_path:   Path to file.
        :param metrics:     ParseMetrics class pointer.
        :param quality:     ParseQulality class pointer.
        :return:            None
        """
        stat_file_handle = None

        try:
            stat_file_handle = sys.stdout if stat_path is None else open(
                stat_path, "w", encoding="utf-8")

            print(ParseMetrics.text(metrics), file=stat_file_handle)
            print(ParseQuality.text(quality), file=stat_file_handle)

            print("PQA:\t{0:2.2f}%".format((
                metrics.average_parsed_ratio / metrics.sentences *
                quality.quality / quality.sentences *
                Decimal('100.0')) if metrics.sentences else 0.0),
                  file=stat_file_handle)

        except IOError as err:
            print("IOError: " + str(err))

        except FileNotFoundError as err:
            print("FileNotFoundError: " + str(err))

        except OSError as err:
            print("OSError: " + str(err))

        except Exception as err:
            print("Exception: " + str(err))

        finally:
            if stat_file_handle is not None and stat_file_handle != sys.stdout:
                stat_file_handle.close()
コード例 #8
0
def parse_quality(test_set: set, ref_set: set) -> ParseQuality:
    """
    Estimate parse quality

    :param test_set: Set of links being tested.
    :param ref_set: Reference set of links
    :return: ParseQuality instance filled with calculated values.
    """
    pq = ParseQuality()

    len_ref = len(ref_set)

    if len_ref > 0:
        pq.total = len_ref
        pq.missing = len(ref_set - test_set)
        pq.extra = len(test_set - ref_set)
        pq.quality = Decimal(len(test_set & ref_set)) / Decimal(len_ref)

    return pq
コード例 #9
0
    def parse(self, dict_path: str, corpus_path: str, output_path: str, ref_path: str, options: int) \
            -> (ParseMetrics, ParseQuality):
        """
        Link Grammar API parser invokation routine.

        :param dict_path:       Dictionary file or directory path.
        :param corpus_path:     Corpus file or directory path.
        :param output_path:     Output file or directory path.
        :param ref_path:        Reference file or directory path.
        :param options:         Bit field. See `optconst.py` for details
        :return:                Tuple (ParseMetrics, ParseQuality)
        """
        input_file_handle = None
        output_file_handle = None

        ref_parses = []

        # Sentence statistics variables
        total_metrics, total_quality = ParseMetrics(), ParseQuality()

        sentence_count = 0                  # number of sentences in the corpus

        print("Info: Parsing a corpus file: '" + corpus_path + "'")
        print("Info: Using dictionary: '" + dict_path + "'")

        if output_path is not None:
            print("Info: Parses are saved in: '" + output_path+get_output_suffix(options) + "'")
        else:
            print("Info: Output file name is not specified. Parses are redirected to 'stdout'.")

        try:
            if options & BIT_PARSE_QUALITY and ref_path is not None:
                try:
                    data = load_ull_file(ref_path)
                    ref_parses = get_parses(data, (options & BIT_NO_LWALL) == BIT_NO_LWALL, False)

                except Exception as err:
                    print("Exception: " + str(err))

            link_line = re.compile(r"\A[0-9].+")

            po = ParseOptions(min_null_count=0, max_null_count=999)
            po.linkage_limit = self._linkage_limit

            di = Dictionary(dict_path)

            input_file_handle = open(corpus_path)
            output_file_handle = sys.stdout if output_path is None \
                                            else open(output_path+get_output_suffix(options), "w")

            for line in input_file_handle:

                # Filter out links when ULL parses are used as input
                if options & BIT_ULL_IN > 0 and link_line.match(line):
                    continue

                # Skip empty lines to get proper statistics estimation and skip commented lines
                if len(line.strip()) < 1:  # or line.startswith("#"):
                    continue

                # Tokenize and parse the sentence
                sent = Sentence(line, di, po)
                linkages = sent.parse()

                sent_metrics, sent_quality = ParseMetrics(), ParseQuality()
                linkage_count = 0

                for linkage in linkages:

                    # Only the first linkage is counted.
                    if linkage_count == 1:
                        break

                    if (options & BIT_OUTPUT_DIAGRAM) == BIT_OUTPUT_DIAGRAM:
                        print(linkage.diagram(), file=output_file_handle)

                    elif (options & BIT_OUTPUT_POSTSCRIPT) == BIT_OUTPUT_POSTSCRIPT:
                        print(linkage.postscript(), file=output_file_handle)

                    elif (options & BIT_OUTPUT_CONST_TREE) == BIT_OUTPUT_CONST_TREE:
                        print(linkage.constituent_tree(), file=output_file_handle)

                    elif not (options & BIT_OUTPUT):

                        tokens, links = parse_postscript(linkage.postscript().replace("\n", ""), options,
                                                         output_file_handle)

                        # Print ULL formated parses
                        print_output(tokens, links, options, output_file_handle)

                        # Calculate parseability
                        sent_metrics += parse_metrics(prepare_tokens(tokens, options))

                        # Calculate parse quality if the option is set
                        if options & BIT_PARSE_QUALITY and len(ref_parses):
                            sent_quality += parse_quality(get_link_set(tokens, links, options),
                                                          ref_parses[sentence_count][1])

                    linkage_count += 1

                assert sent_metrics.average_parsed_ratio <= 1.0, "sent_metrics.average_parsed_ratio > 1.0"
                assert sent_quality.quality <= 1.0, "sent_quality.quality > 1.0"

                total_metrics += sent_metrics
                total_quality += sent_quality

                # if not linkage_count:
                #     sent_metrics.completely_unparsed_ratio += 1

                sentence_count += 1

            total_metrics.sentences = sentence_count
            total_quality.sentences = sentence_count

            # Prevent interleaving "Dictionary close" messages
            ParseOptions(verbosity=0)

        except LG_DictionaryError as err:
            print("LG_DictionaryError: " + str(err))

        except LG_Error as err:
            print("LG_Error: " + str(err))

        except IOError as err:
            print("IOError: " + str(err))

        except FileNotFoundError as err:
            print("FileNotFoundError: " + str(err))

        finally:
            if input_file_handle is not None:
                input_file_handle.close()

            if output_file_handle is not None and output_file_handle != sys.stdout:
                output_file_handle.close()

            return total_metrics, total_quality
コード例 #10
0
def compare_ull_files(test_path, ref_file, verbose,
                      ignore_left_wall) -> ParseQuality:
    """
    Initiate evaluation process for one or multiple files.

    :param test_path: Path to file(s) to be tested.
    :param ref_file: Path to reference file(s).
    :param verbose: Boolean value which eables intermediate result output if set to True.
    :param ignore_left_wall: Boolean value which tells the script to ignore LEFT-WALL and period links if set to True.
    :return: ParseQuality class instance holding parse quality results for the whole corpus (all files if test_path is
                a directory name).
    """
    total_parse_quality = ParseQuality()
    total_file_count = 0

    def evaluate(test_file):
        """ Callback evaluation function """

        nonlocal total_parse_quality
        nonlocal total_file_count

        print("\nComparing parses:")
        print("-----------------")
        print("File being tested: '" + test_file + "'")
        print("Reference file   : '" + ref_file + "'")

        suffix = "" if test_file[-1] != "2" else "2"

        out_file = test_file + ".stat" + suffix

        print("Result file      : '" + out_file + "'")

        mode = "a" if os.path.isfile(out_file) else "w"

        try:
            ref_data = load_ull_file(ref_file)
            ref_parses = get_parses(ref_data, ignore_left_wall)

            test_data = load_ull_file(test_file)
            test_parses = get_parses(test_data, ignore_left_wall)

            with open(out_file, mode) as ofile:
                print("Reference file   : '" + ref_file + "'", file=ofile)

                total_parse_quality += eval_parses(test_parses, ref_parses,
                                                   verbose, ofile)
                total_file_count += 1

        except IOError as err:
            print("IOError: " + str(err))

        except Exception as err:
            print("Exception: " + str(err))

    try:
        # If specified name is a file.
        if os.path.isfile(test_path):
            evaluate(test_path)

        # If specified name is a directory.
        elif os.path.isdir(test_path):
            traverse_dir(test_path, ".ull2", evaluate, None, True)

        # If file or directory does not exist.
        else:
            raise ("Error: File or directory '" + test_path +
                   "' does not exist.")

        if total_file_count > 1:
            total_parse_quality /= float(total_file_count)

    except IOError as err:
        print("IOError: " + str(err))

    except Exception as err:
        print("Exception: " + str(err))

    finally:
        return total_parse_quality