def preload_externals(): if '--load' not in sys.argv: return from benchmarkstt.factory import CoreFactory for i in range(sys.argv.index('--load') + 1, len(sys.argv)): if sys.argv[i].startswith('-'): break CoreFactory.add_supported_namespace(sys.argv[i])
class Differ(ABC): @abstractmethod def __init__(self, a, b): """ :meta public: """ raise NotImplementedError() @abstractmethod def get_opcodes(self): """ Return list of 5-tuples describing how to turn `a` into `b`. Each tuple is of the form `(tag, i1, i2, j1, j2)`. The first tuple has `i1 == j1 == 0`, and remaining tuples have `i1` equals the `i2` from the tuple preceding it, and likewise for `j1` equals the previous `j2`. The tags are strings, with these meanings: - 'replace': `a[i1:i2]` should be replaced by `b[j1:j2]` - 'delete': `a[i1:i2]` should be deleted. Note that `j1==j2` in this case. - 'insert': `b[j1:j2]` should be inserted at `a[i1:i1]`. Note that `i1==i2` in this case. - 'equal': `a[i1:i2] == b[j1:j2]` """ raise NotImplementedError() factory = CoreFactory(Differ, False)
""" Responsible for dealing with input formats and converting them to benchmarkstt native schema """ from abc import ABC, abstractmethod from benchmarkstt.factory import CoreFactory class Input(ABC): @abstractmethod def __iter__(self): """ Each input class should be accessible as iterator, each iteration should return a Item, so the input format is essentially usable and can be easily converted to a :py:class:`benchmarkstt.schema.Schema` :meta public: """ raise NotImplementedError() factory = CoreFactory(Input, False)
def result(self, title, result): """ :meta public: """ raise NotImplementedError() class SimpleTextBase(Output): @staticmethod def print(result): if hasattr(result, '_asdict'): result = result._asdict() if type(result) is float: print("%.6f" % (result, )) elif type(result) is dict or type(result) is OrderedDict: for k, v in result.items(): print("%s: %r" % (k, v)) else: print(result) @abstractmethod def result(self, title, result): """ :meta public: """ raise NotImplementedError() factory = CoreFactory(Output)
if path is not None: file = os.path.join(path, file) with open(file, encoding=encoding) as f: self._normalizer = NormalizationAggregate(title=title) for line in csv.reader(f): try: self._normalizer.add(normalizer(*line)) except TypeError as e: raise ValueError("%s:%d %r(%r) %r" % (file, line.lineno, normalizer, line, e)) def _normalize(self, text: str) -> str: return self._normalizer.normalize(text) class FileFactory(CoreFactory): def create(self, name, file=None, encoding=None, path=None): cls = super().__getitem__(name) return File(cls, file, encoding, path=path) def __getitem__(self, item): """ :meta public: """ raise NotImplementedError("Not supported") factory = CoreFactory(_NormalizerNoLogs) file_factory = FileFactory(NormalizerWithFileSupport, False)
""" Responsible for calculating metrics. """ from abc import ABC, abstractmethod from benchmarkstt.schema import Schema from benchmarkstt.factory import CoreFactory class Metric(ABC): """ Base class for metrics """ @abstractmethod def compare(self, ref: Schema, hyp: Schema): raise NotImplementedError() factory = CoreFactory(Metric)
""" Responsible for segmenting text. """ from abc import ABC, abstractmethod from benchmarkstt.factory import CoreFactory class Segmenter(ABC): @abstractmethod def __iter__(self): """ Each segmentation class should be accessible as iterator, each iteration should return a Item, so the input format is essentially usable and can be easily converted to a :py:class:`benchmarkstt.schema.Schema` :meta public: """ raise NotImplementedError() factory = CoreFactory(Segmenter, False)
class Differ(ABC): @abstractmethod def __init__(self, a, b): """ :meta public: """ raise NotImplementedError() @abstractmethod def get_opcodes(self): """ Return list of 5-tuples describing how to turn `a` into `b`. Each tuple is of the form `(tag, i1, i2, j1, j2)`. The first tuple has `i1 == j1 == 0`, and remaining tuples have `i1` equals the `i2` from the tuple preceding it, and likewise for `j1` equals the previous `j2`. The tags are strings, with these meanings: - 'replace': `a[i1:i2]` should be replaced by `b[j1:j2]` - 'delete': `a[i1:i2]` should be deleted. Note that `j1==j2` in this case. - 'insert': `b[j1:j2]` should be inserted at `a[i1:i1]`. Note that `i1==i2` in this case. - 'equal': `a[i1:i2] == b[j1:j2]` """ raise NotImplementedError() factory = CoreFactory(Differ)