class Engine: def __init__(self): with open('config.json', 'r') as file: self._config = json.loads(file.read()) self._src_pipeline = Pipeline(self._config['pipeline_config'], self._config['src_lang'], self._config['tgt_lang']) self._tgt_pipeline = Pipeline(self._config['pipeline_config_tgt'], self._config['tgt_lang']) # Returns a trained model def train(self, p, src_text, tgt_text): res = {'src_prep': [], 'tgt_prep': []} # Shape returns the dimensions of the DataFrame, so shape[0] is the # number of rows. for i in range(len(src_text)): if ((i + 1) % 10 == 0): print(f'Trained with {i+1} segments.') # The attribute iat gets the value at [x, y] src = src_text[i] tgt = tgt_text[i] src_prep = self._src_pipeline.preprocess_str(src) tgt_prep = self._tgt_pipeline.preprocess_str(tgt) p.train(src_prep, tgt_prep) return p def translate(self, p, src_text, tgt_text): # Shape returns the dimensions of the DataFrame, so shape[0] is the # number of rows. translations_post = [] for i in range(0, len(src_text), 30): ## for i in range(len(src_text)): ## seg = Seg(src_text[i]) if ((i + 1) % 30 == 0): print(f'Translated {i+1} segments.') if i + 30 <= len(src_text): segs = src_text[i:i + 30] else: segs = src_text[i:] segs_prep = [] for seg in segs: segs_prep.append(self._src_pipeline.preprocess_str(seg)) ## self._src_pipeline.preprocess(seg) translations = p.translate(segs_prep) ## translation = p.translate(seg_prep) for trans in translations: tgt = (' ').join(trans.tgt) tgt = self._src_pipeline.postprocess_str(tgt) translations_post.append(tgt) ## translation_post = (' ').join(translation[0].tgt) return translations_post
class Engine: def __init__(self, in_file, ref_file): with open('config.json', 'r') as file: self._config = json.loads(file.read()) self._src_pipeline = Pipeline(self._config['pipeline_config'],\ self._config['src_lang'], self._config['tgt_lang']) self._tgt_pipeline = Pipeline(self._config['pipeline_config_tgt'],\ self._config['tgt_lang']) self._in_file = os.path.join('data', in_file) self._ref_file = os.path.join('data', ref_file) # Returns trained model def train(self): p = Pangeanmt('.') with open(self._in_file, 'r') as src_file: with open(self._ref_file, 'r') as tgt_file: for seg in src_file: #try: src = self._src_pipeline.preprocess_str(seg) tgt_seg = tgt_file.readline() tgt = self._tgt_pipeline.preprocess_str(tgt_seg) p.train(src, tgt) #except: # print('Something went wrong.') return p def translate_file(self, p, output_file): with open(self._in_file, 'r') as in_file: with open(output_file, 'w+') as out_file: for seg in in_file: seg = self._src_pipeline.preprocess_str(seg) translation = p.translate([seg]) tgt = (' ').join(translation[0].tgt) tgt = self._src_pipeline.postprocess_str(tgt) out_file.write(f'{tgt}\n') def gen_config(self, alpha): lr = self._config['opts']['learning_rate'] os.rename('config.json', f'{lr}_config.json') self._config['opts']['learning_rate'] = alpha with open('config.json', 'w+') as config_file: config_file.write(json.dumps(self._config))
class Engine: def __init__(self): with open("config.json", "r") as file: self._config = json.loads(file.read()) self._src_pipeline = Pipeline( self._config["pipeline_config"], self._config["src_lang"], self._config["tgt_lang"], ) self._tgt_pipeline = Pipeline(self._config["pipeline_config_tgt"], self._config["tgt_lang"]) # Returns a trained model def train_from_table(self, p, table): res = {"src_prep": [], "tgt_prep": []} # Shape returns the dimensions of the DataFrame, so shape[0] is the # number of rows. for i in range(table.shape[0]): if (i + 1) % 10 == 0: print(f"Trained with {i+1} segments.") # The attribute iat gets the value at [x, y] src = table.iat[i, 0] tgt = table.iat[i, 1] src_prep = self._src_pipeline.preprocess_str(src) tgt_prep = self._tgt_pipeline.preprocess_str(tgt) res["src_prep"].append(src_prep) res["tgt_prep"].append(tgt_prep) p.train(src_prep, tgt_prep) return p, res def no_train_translate_from_table(self, p, table): res = {"original": []} # Shape returns the dimensions of the DataFrame, so shape[0] is the # number of rows. for i in range(table.shape[0]): if (i + 1) % 10 == 0: print(f"Translated {i+1} segments.") # The attribute iat gets the value at [x, y] seg = table.iat[i, 0] seg_prep = self._src_pipeline.preprocess_str(seg) translation = p.translate([seg_prep]) tgt = (" ").join(translation[0].tgt) tgt = self._src_pipeline.postprocess_str(tgt) res["original"].append(tgt) return res def translate_from_table(self, p, table, j): res = {f"tgts_{j}": []} # Shape returns the dimensions of the DataFrame, so shape[0] is the # number of rows. for i in range(table.shape[0]): if (i + 1) % 10 == 0: print(f"Translated {i+1} segments.") # The attribute iat gets the value at [x, y] seg = table.iat[i, 0] seg_prep = self._src_pipeline.preprocess_str(seg) translation = p.translate([seg_prep]) tgt = (" ").join(translation[0].tgt) tgt = self._src_pipeline.postprocess_str(tgt) res[f"tgts_{j}"].append(tgt) return res