class TranslatorInterface(): """An interface to a single, possibly multilingual, model.""" def __init__(self, srclang, targetlang, service, model): self.service = service self.contentprocessor = ContentProcessor( srclang, targetlang, sourcebpe=self.service.get('sourcebpe'), targetbpe=self.service.get('targetbpe'), sourcespm=self.service.get('sourcespm'), targetspm=self.service.get('targetspm')) self.worker = model # becomes nonempty if there are multiple target languages self.preamble = "" def translate(self, text): sentences = self.contentprocessor.preprocess(text) translatedSentences = self.worker.translate(self.preamble + '\n'.join(sentences)) translation = self.contentprocessor.postprocess(translatedSentences) return ' '.join(translation) def ready(self): return self.worker != None and self.worker.ready() def on_exit(self): if self.worker != None: self.worker.on_exit()
class TranslatorWorker(): def __init__(self, srclang, targetlang, service): self.q = queues.Queue() # Service definition self.service = service self.p = None self.contentprocessor = ContentProcessor( srclang, targetlang, sourcebpe=self.service.get('sourcebpe'), targetbpe=self.service.get('targetbpe'), sourcespm=self.service.get('sourcespm'), targetspm=self.service.get('targetspm')) self.ws_url = "ws://{}:{}/translate".format(self.service['host'], self.service['port']) if self.service['configuration']: self.run() @gen.coroutine def run(self): process.Subprocess.initialize() self.p = process.Subprocess([ 'marian-server', '-c', self.service['configuration'], '-p', self.service['port'], '--allow-unk', # enables translation with a mini-batch size of 64, i.e. translating 64 sentences at once, with a beam-size of 6. '-b', '6', '--mini-batch', '64', # use a length-normalization weight of 0.6 (this usually increases BLEU a bit). '--normalize', '0.6', '--maxi-batch-sort', 'src', '--maxi-batch', '100', ]) self.p.set_exit_callback(self.on_exit) ret = yield self.p.wait_for_exit() def on_exit(self): print("Process exited") def translate(self, srctxt): ws = websocket.create_connection(self.ws_url) sentences = self.contentprocessor.preprocess(srctxt) ws.send('\n'.join(sentences)) translatedSentences = ws.recv().split('\n') ws.close() translation = self.contentprocessor.postprocess(translatedSentences) return ' '.join(translation)
class TranslatorWorker(): def __init__(self, srclang, targetlang, service): self.q = queues.Queue() # Service definition self.service = service self.p = None self.contentprocessor = ContentProcessor( srclang, targetlang, sourcebpe=self.service.get('sourcebpe'), targetbpe=self.service.get('targetbpe'), sourcespm=self.service.get('sourcespm'), targetspm=self.service.get('targetspm') ) self.ws_url = "ws://{}:{}/translate".format( self.service['host'], self.service['port']) if self.service['configuration']: self.run() @gen.coroutine def run(self): process.Subprocess.initialize() self.p = process.Subprocess(['marian-server', '-c', self.service['configuration'], '--quiet-translation', '-p', self.service['port']]) self.p.set_exit_callback(self.on_exit) ret = yield self.p.wait_for_exit() def on_exit(self): print("Process exited") def translate(self, srctxt): ws = websocket.create_connection(self.ws_url) sentences = self.contentprocessor.preprocess(srctxt) translatedSentences = [] for sentence in sentences: ws.send(sentence) translatedSentences.append(ws.recv()) ws.close() translation = self.contentprocessor.postprocess(translatedSentences) return ' '.join(translation)
y = y.strip() z = z.strip() pairs.append((x, y, z)) # Filer out the sentences with less than 5 tokens or larger than 120 tokens for i in range(len(pairs) - 1, -1, -1): if len(pairs[i][0].split()) > 120 or len(pairs[i][0].split()) <= 4: pairs.pop(i) # Load preprocessor services = {} with open("service.json", 'r') as configfile: services = json.load(configfile) config = services[src][trg] contentprocessor = ContentProcessor(src, trg, sourcebpe=config.get('sourcebpe'), targetbpe=config.get('targetbpe'), sourcespm=config.get('sourcespm'), targetspm=config.get('targetspm')) with open("{}_en_pairs.csv".format(src), "a", newline='') as datacsv: csvwriter = csv.writer(datacsv, dialect=("excel")) csvwriter.writerow(["score", src, "en"]) for s, t, score in pairs: csvwriter.writerow([score, s, t]) sentences = [contentprocessor.preprocess(pair[0]) for pair in pairs] with open('input_{}.txt'.format(src), 'w') as f: for _list in sentences: for _string in _list: f.write(_string + ' ') f.write('\n')