Exemple #1
0
def preload_externals():
    if '--load' not in sys.argv:
        return

    from benchmarkstt.factory import CoreFactory

    for i in range(sys.argv.index('--load') + 1, len(sys.argv)):
        if sys.argv[i].startswith('-'):
            break
        CoreFactory.add_supported_namespace(sys.argv[i])
Exemple #2
0

class Differ(ABC):
    @abstractmethod
    def __init__(self, a, b):
        """
        :meta public:
        """
        raise NotImplementedError()

    @abstractmethod
    def get_opcodes(self):
        """
        Return list of 5-tuples describing how to turn `a` into `b`.

        Each tuple is of the form `(tag, i1, i2, j1, j2)`. The first tuple has
        `i1 == j1 == 0`, and remaining tuples have `i1` equals the `i2` from the
        tuple preceding it, and likewise for `j1` equals the previous `j2`.

        The tags are strings, with these meanings:

         - 'replace': `a[i1:i2]` should be replaced by `b[j1:j2]`
         - 'delete': `a[i1:i2]` should be deleted. Note that `j1==j2` in this case.
         - 'insert': `b[j1:j2]` should be inserted at `a[i1:i1]`. Note that `i1==i2` in this case.
         - 'equal': `a[i1:i2] == b[j1:j2]`
        """
        raise NotImplementedError()


factory = CoreFactory(Differ, False)
Exemple #3
0
"""
Responsible for dealing with input formats and converting them to benchmarkstt native schema

"""

from abc import ABC, abstractmethod
from benchmarkstt.factory import CoreFactory


class Input(ABC):
    @abstractmethod
    def __iter__(self):
        """
        Each input class should be accessible as iterator, each iteration should
        return a Item, so the input format is essentially usable and can be easily
        converted to a :py:class:`benchmarkstt.schema.Schema`

        :meta public:
        """
        raise NotImplementedError()


factory = CoreFactory(Input, False)
Exemple #4
0
    def result(self, title, result):
        """
        :meta public:
        """
        raise NotImplementedError()


class SimpleTextBase(Output):
    @staticmethod
    def print(result):
        if hasattr(result, '_asdict'):
            result = result._asdict()

        if type(result) is float:
            print("%.6f" % (result, ))
        elif type(result) is dict or type(result) is OrderedDict:
            for k, v in result.items():
                print("%s: %r" % (k, v))
        else:
            print(result)

    @abstractmethod
    def result(self, title, result):
        """
        :meta public:
        """
        raise NotImplementedError()


factory = CoreFactory(Output)
Exemple #5
0
        if path is not None:
            file = os.path.join(path, file)

        with open(file, encoding=encoding) as f:
            self._normalizer = NormalizationAggregate(title=title)
            for line in csv.reader(f):
                try:
                    self._normalizer.add(normalizer(*line))
                except TypeError as e:
                    raise ValueError("%s:%d %r(%r) %r" %
                                     (file, line.lineno, normalizer, line, e))

    def _normalize(self, text: str) -> str:
        return self._normalizer.normalize(text)


class FileFactory(CoreFactory):
    def create(self, name, file=None, encoding=None, path=None):
        cls = super().__getitem__(name)
        return File(cls, file, encoding, path=path)

    def __getitem__(self, item):
        """
        :meta public:
        """
        raise NotImplementedError("Not supported")


factory = CoreFactory(_NormalizerNoLogs)
file_factory = FileFactory(NormalizerWithFileSupport, False)
Exemple #6
0
"""
Responsible for calculating metrics.

"""

from abc import ABC, abstractmethod
from benchmarkstt.schema import Schema
from benchmarkstt.factory import CoreFactory


class Metric(ABC):
    """
    Base class for metrics
    """
    @abstractmethod
    def compare(self, ref: Schema, hyp: Schema):
        raise NotImplementedError()


factory = CoreFactory(Metric)
Exemple #7
0
"""
Responsible for segmenting text.
"""

from abc import ABC, abstractmethod
from benchmarkstt.factory import CoreFactory


class Segmenter(ABC):
    @abstractmethod
    def __iter__(self):
        """
        Each segmentation class should be accessible as iterator, each iteration should
        return a Item, so the input format is essentially usable and can be easily
        converted to a :py:class:`benchmarkstt.schema.Schema`

        :meta public:
        """
        raise NotImplementedError()


factory = CoreFactory(Segmenter, False)
Exemple #8
0

class Differ(ABC):
    @abstractmethod
    def __init__(self, a, b):
        """
        :meta public:
        """
        raise NotImplementedError()

    @abstractmethod
    def get_opcodes(self):
        """
        Return list of 5-tuples describing how to turn `a` into `b`.

        Each tuple is of the form `(tag, i1, i2, j1, j2)`. The first tuple has
        `i1 == j1 == 0`, and remaining tuples have `i1` equals the `i2` from the
        tuple preceding it, and likewise for `j1` equals the previous `j2`.

        The tags are strings, with these meanings:

         - 'replace': `a[i1:i2]` should be replaced by `b[j1:j2]`
         - 'delete': `a[i1:i2]` should be deleted. Note that `j1==j2` in this case.
         - 'insert': `b[j1:j2]` should be inserted at `a[i1:i1]`. Note that `i1==i2` in this case.
         - 'equal': `a[i1:i2] == b[j1:j2]`
        """
        raise NotImplementedError()


factory = CoreFactory(Differ)