Exemple #1
0
class TranslatorInterface():
    """An interface to a single, possibly multilingual, model."""
    def __init__(self, srclang, targetlang, service, model):
        self.service = service
        self.contentprocessor = ContentProcessor(
            srclang,
            targetlang,
            sourcebpe=self.service.get('sourcebpe'),
            targetbpe=self.service.get('targetbpe'),
            sourcespm=self.service.get('sourcespm'),
            targetspm=self.service.get('targetspm'))
        self.worker = model
        # becomes nonempty if there are multiple target languages
        self.preamble = ""

    def translate(self, text):
        sentences = self.contentprocessor.preprocess(text)
        translatedSentences = self.worker.translate(self.preamble +
                                                    '\n'.join(sentences))
        translation = self.contentprocessor.postprocess(translatedSentences)
        return ' '.join(translation)

    def ready(self):
        return self.worker != None and self.worker.ready()

    def on_exit(self):
        if self.worker != None:
            self.worker.on_exit()
Exemple #2
0
class TranslatorWorker():
    def __init__(self, srclang, targetlang, service):
        self.q = queues.Queue()
        # Service definition
        self.service = service
        self.p = None
        self.contentprocessor = ContentProcessor(
            srclang,
            targetlang,
            sourcebpe=self.service.get('sourcebpe'),
            targetbpe=self.service.get('targetbpe'),
            sourcespm=self.service.get('sourcespm'),
            targetspm=self.service.get('targetspm'))
        self.ws_url = "ws://{}:{}/translate".format(self.service['host'],
                                                    self.service['port'])
        if self.service['configuration']:
            self.run()

    @gen.coroutine
    def run(self):
        process.Subprocess.initialize()
        self.p = process.Subprocess([
            'marian-server',
            '-c',
            self.service['configuration'],
            '-p',
            self.service['port'],
            '--allow-unk',
            # enables translation with a mini-batch size of 64, i.e. translating 64 sentences at once, with a beam-size of 6.
            '-b',
            '6',
            '--mini-batch',
            '64',
            # use a length-normalization weight of 0.6 (this usually increases BLEU a bit).
            '--normalize',
            '0.6',
            '--maxi-batch-sort',
            'src',
            '--maxi-batch',
            '100',
        ])
        self.p.set_exit_callback(self.on_exit)
        ret = yield self.p.wait_for_exit()

    def on_exit(self):
        print("Process exited")

    def translate(self, srctxt):
        ws = websocket.create_connection(self.ws_url)
        sentences = self.contentprocessor.preprocess(srctxt)
        ws.send('\n'.join(sentences))
        translatedSentences = ws.recv().split('\n')
        ws.close()
        translation = self.contentprocessor.postprocess(translatedSentences)
        return ' '.join(translation)
Exemple #3
0
class TranslatorWorker():

    def __init__(self, srclang, targetlang, service):
        self.q = queues.Queue()
        # Service definition
        self.service = service
        self.p = None
        self.contentprocessor = ContentProcessor(
            srclang,
            targetlang,
            sourcebpe=self.service.get('sourcebpe'),
            targetbpe=self.service.get('targetbpe'),
            sourcespm=self.service.get('sourcespm'),
            targetspm=self.service.get('targetspm')
        )
        self.ws_url = "ws://{}:{}/translate".format(
            self.service['host'], self.service['port'])
        if self.service['configuration']:
            self.run()

    @gen.coroutine
    def run(self):
        process.Subprocess.initialize()
        self.p = process.Subprocess(['marian-server', '-c',
                                     self.service['configuration'],
                                     '--quiet-translation',
                                     '-p', self.service['port']])
        self.p.set_exit_callback(self.on_exit)
        ret = yield self.p.wait_for_exit()

    def on_exit(self):
        print("Process exited")

    def translate(self, srctxt):
        ws = websocket.create_connection(self.ws_url)
        sentences = self.contentprocessor.preprocess(srctxt)
        translatedSentences = []
        for sentence in sentences:
            ws.send(sentence)
            translatedSentences.append(ws.recv())
        ws.close()
        translation = self.contentprocessor.postprocess(translatedSentences)
        return ' '.join(translation)
Exemple #4
0
        y = y.strip()
        z = z.strip()
        pairs.append((x, y, z))
# Filer out the sentences with less than 5 tokens or larger than 120 tokens
for i in range(len(pairs) - 1, -1, -1):
    if len(pairs[i][0].split()) > 120 or len(pairs[i][0].split()) <= 4:
        pairs.pop(i)
# Load preprocessor
services = {}
with open("service.json", 'r') as configfile:
    services = json.load(configfile)
config = services[src][trg]
contentprocessor = ContentProcessor(src,
                                    trg,
                                    sourcebpe=config.get('sourcebpe'),
                                    targetbpe=config.get('targetbpe'),
                                    sourcespm=config.get('sourcespm'),
                                    targetspm=config.get('targetspm'))

with open("{}_en_pairs.csv".format(src), "a", newline='') as datacsv:
    csvwriter = csv.writer(datacsv, dialect=("excel"))
    csvwriter.writerow(["score", src, "en"])
    for s, t, score in pairs:
        csvwriter.writerow([score, s, t])

sentences = [contentprocessor.preprocess(pair[0]) for pair in pairs]
with open('input_{}.txt'.format(src), 'w') as f:
    for _list in sentences:
        for _string in _list:
            f.write(_string + ' ')
        f.write('\n')