def test_builtinmapper_invalid(self):
        """Test that an invalid builtin scheme name raises a
        BuiltinCharMapNotFound exception.
        """

        with pytest.raises(BuiltinCharMapNotFoundError):
            CharMapper.builtin_mapper('hello')
Example #2
0
def main():  # pragma: no cover
    try:
        version = ('CAMeL Tools v{}'.format(__version__))
        arguments = docopt(__doc__, version=version)

        # Open files (or just use stdin and stdout)
        fin, fout = _open_files(arguments['FILE'], arguments['--output'])

        try:
            mapper = CharMapper.builtin_mapper('arclean')
            _arclean(mapper, fin, fout)

        # If everything worked so far, this shouldn't happen
        except Exception:
            sys.stderr.write('Error: An error occured during cleaning.\n')
            fin.close()
            fout.close()
            sys.exit(1)

            # Cleanup
            if arguments['FILE'] is not None:
                fin.close()
            if arguments['--output'] is not None:
                fout.close()

        sys.exit(0)
    except KeyboardInterrupt:
        sys.stderr.write('Exiting...\n')
        sys.exit(1)
    except Exception:
        sys.stderr.write('Error: An unknown error occurred.\n')
        sys.exit(1)
    def test_builtinmapper_arclean(self):
        """Test that the builtin 'arclean' scheme is loaded without errors.
        """

        assert CharMapper.builtin_mapper('arclean')
    def test_builtinmapper_xmlbw2hsb(self):
        """Test that the builtin 'xmlbw2hsb' scheme is loaded without
        errors.
        """

        assert CharMapper.builtin_mapper('xmlbw2hsb')
    def test_builtinmapper_hsb2xmlbw(self):
        """Test that the builtin 'hsb2xmlbw' scheme is loaded without errors.
        """

        assert CharMapper.builtin_mapper('hsb2xmlbw')
    def test_builtinmapper_safebw2bw(self):
        """Test that the builtin 'safebw2bw' scheme is loaded without errors.
        """

        assert CharMapper.builtin_mapper('safebw2bw')
    def test_builtinmapper_bw2ar(self):
        """Test that the builtin 'bw2ar' scheme is loaded without errors.
        """

        assert CharMapper.builtin_mapper('bw2ar')
Example #8
0
def main():  # pragma: no cover
    try:
        version = ('CAMeL Tools v{}'.format(__version__))
        arguments = docopt(__doc__, version=version)

        if arguments['--list']:
            for scheme in _BUILTIN_SCHEMES:
                print("{}   {}".format(scheme[0].ljust(20), scheme[1]))
            sys.exit(0)

        if arguments['--scheme'] is not None:
            if arguments['--scheme'] not in [s[0] for s in _BUILTIN_SCHEMES]:
                sys.stderr.write('Error: {} is not a valid scheme.\n'
                                 'Run `camel_transliterate -l` to see the list'
                                 ' of available schemes.'
                                 '\n'.format(repr(arguments['--scheme'])))
                sys.exit(1)

            if arguments['--marker'] is None:
                marker = '@@IGNORE@@'
            else:
                marker = arguments['--marker']

            ignore_markers = arguments['--ignore-markers']
            strip_markers = arguments['--strip-markers']

            # Open files (or just use stdin and stdout)
            fin, fout = _open_files(arguments['FILE'], arguments['--output'])

            # Load the CharMapper and initialize a Transliterator with it
            try:
                mapper = CharMapper.builtin_mapper(arguments['--scheme'])
                trans = Transliterator(mapper, marker)
            except Exception:  # pylint: disable=W0703
                sys.stderr.write('Error: Could not load builtin scheme'
                                 ' {}.\n'.format(repr(arguments['--scheme'])))
                sys.exit(1)

            # Transliterate lines
            try:
                for line in fin:
                    line = force_unicode(line)

                    if six.PY3:
                        fout.write(
                            trans.transliterate(line, strip_markers,
                                                ignore_markers))
                    else:
                        fout.write(
                            force_encoding(
                                trans.transliterate(line, strip_markers,
                                                    ignore_markers)))
                fout.flush()

            # If everything worked so far, this shouldn't happen
            except Exception:  # pylint: disable=W0703
                sys.stderr.write('Error: An unkown error occured during '
                                 'transliteration.\n')
                sys.exit(1)

            # Cleanup
            if arguments['FILE'] is not None:
                fin.close()
            if arguments['--output'] is not None:
                fout.close()

        sys.exit(0)
    except KeyboardInterrupt:
        sys.stderr.write('Exiting...\n')
        sys.exit(1)
    except Exception:
        sys.stderr.write('Error: An unknown error occurred.\n')
        sys.exit(1)
Example #9
0
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

import subprocess
import sys
import os
import argparse
import time

from helpers.preprocess import preprocess
from helpers.tag import tag
from ai.tests.mle import train_mle, predict_mle
from camel_tools.utils.charmap import CharMapper

ar2bw = CharMapper.builtin_mapper('ar2bw')


def is_bool(s):
    return str(s) != 'False'


parser = argparse.ArgumentParser(
    description=
    'This program rewrites (transliterates) from one language script to another'
)

# --model_name can take values "mle", "word2word", "line2line", or "hybrid"
parser.add_argument('--model_name',
                    action="store",
                    dest='model_name',
Example #10
0
 def __init__(self) -> None:
     super().__init__()
     self.sec_cleaner = None
     self.clean_mapper = CharMapper.builtin_mapper('arclean')
     self.ar2bw_mapper = CharMapper.builtin_mapper('ar2bw')
     self.bw2ar_mapper = CharMapper.builtin_mapper('bw2ar')