def main(): config = utilities.config_file_reader() path_to_moses = utilities.safe_string( config.get("Environment Settings", "path_to_moses_decoder")) server = Server(path_to_moses) server.translate_interactive("es-en.working")
def main(): config = utilities.config_file_reader() path_to_moses = utilities.safe_string(config.get("Environment Settings", "path_to_moses_decoder")) mem_limit = config.getint("Environment Settings", "mem_limit") max_len = config.getint("Iteration Settings", "max_sentence_len") min_len = config.getint("Iteration Settings", "min_sentence_len") parser = Parser(path_to_moses, mem_limit, max_len, min_len, True) parser.tokenize("data/src/europarl-v7.es-en.es") parser.tokenize("data/src/europarl-v7.es-en.en") parser.tokenize("data/src/europarl-v7.fr-en.en") parser.tokenize("data/src/europarl-v7.fr-en.fr") parser.cleanse("data/europarl-v7.es-en.es.tok", "data/europarl-v7.es-en.en.tok") parser.cleanse("data/europarl-v7.fr-en.en.tok", "data/europarl-v7.fr-en.fr.tok") parser.split_train_tune_test("data/europarl-v7.es-en.es.tok.cleansed", "data/europarl-v7.es-en.en.tok.cleansed", "data/europarl-v7.fr-en.en.tok.cleansed", "data/europarl-v7.fr-en.fr.tok.cleansed", .6, .2) # Makes sense to do this to the training/tune data if the training/tune data is too large parser.subset("data/train/europarl-v7.es-en.es.tok.cleansed.train", "data/train/europarl-v7.es-en.en.tok.cleansed.train", .5, "train/") parser.subset("data/train/europarl-v7.fr-en.en.tok.cleansed.train", "data/train/europarl-v7.fr-en.fr.tok.cleansed.train", .5, "train/") # Necessary to do for test data to be consistent parser.match("data/test/europarl-v7.es-en.es.tok.cleansed.test", "data/test/europarl-v7.es-en.en.tok.cleansed.test", "data/test/europarl-v7.fr-en.en.tok.cleansed.test", "data/test/europarl-v7.fr-en.fr.tok.cleansed.test")
def main(): config = utilities.config_file_reader() NCPUS = config.getint("Environment Settings", "ncpus") path_to_moses = utilities.safe_string(config.get("Environment Settings", "path_to_moses_decoder")) tuner = Tune(path_to_moses, NCPUS) tuner.tune("data/tune/europarl-v7.es-en.es.tok.cleansed.tune", "data/tune/europarl-v7.es-en.en.tok.cleansed.tune", "es-en.working") tuner.tune("data/tune/europarl-v7.fr-en.en.tok.cleansed.tune", "data/tune/europarl-v7.fr-en.fr.tok.cleansed.tune", "en-fr.working")
def main(): config = utilities.config_file_reader() path_to_moses = utilities.safe_string(config.get("Environment Settings", "path_to_moses_decoder")) test = Test(path_to_moses) test.test_translation_quality("data/test/europarl-v7.es-en.es.tok.cleansed.test", "data/test/europarl-v7.es-en.en.tok.cleansed.test", "es-en.working") test.test_translation_quality("data/test/europarl-v7.fr-en.en.tok.cleansed.test", "data/test/europarl-v7.fr-en.fr.tok.cleansed.test", "en-fr.working") test.test_pivoting_quality("data/test/europarl-v7.es-en.es.tok.cleansed.test.matched", "es-en.working", "data/test/europarl-v7.fr-en.fr.tok.cleansed.test.matched", "en-fr.working")
def smtp_pipeline(config): path_to_moses = config.get("Environment Settings", "path_to_moses_decoder") mem_limit = config.getint("Environment Settings", "mem_limit") max_len = config.getint("Iteration Settings", "max_sentence_len") min_len = config.getint("Iteration Settings", "min_sentence_len") srcf = utilities.safe_string(config.get("Iteration Settings", "src_lang_data")) piv1f = utilities.safe_string(config.get("Iteration Settings", "src_piv_lang_data")) piv2f = utilities.safe_string(config.get("Iteration Settings", "piv_tar_lang_data")) tarf = utilities.safe_string(config.get("Iteration Settings", "tar_lang_data")) train = config.getfloat("Iteration Settings", "train_split") test = config.getfloat("Iteration Settings", "test_split") ncpus = config.getint("Environment Settings", "ncpus") ngram = config.getint("Environment Settings", "ngram") work_dir1 = utilities.safe_string(config.get("Iteration Settings", "working_dir_first_leg")) work_dir2 = utilities.safe_string(config.get("Iteration Settings", "working_dir_second_leg")) pair1, pair2 = FileDataPair(srcf, piv1f), FileDataPair(piv2f, tarf) raw_files = pair1.get_raw_filenames() + pair2.get_raw_filenames() pair1_tokenized_src, pair1_tokenized_tar = pair1.get_tokenized_filenames() pair2_tokenized_src, pair2_tokenized_tar = pair2.get_tokenized_filenames() pair1_cleansed_src, pair1_cleansed_tar = pair1.get_cleansed_filenames() pair2_cleansed_src, pair2_cleansed_tar = pair2.get_cleansed_filenames() parser = Parser(path_to_moses, mem_limit, max_len, min_len, False) parser.tokenize_files(raw_files) parser.cleanse(pair1_tokenized_src, pair1_tokenized_tar) parser.cleanse(pair2_tokenized_src, pair2_tokenized_tar) parser.split_train_tune_test(pair1_cleansed_src, pair1_cleansed_tar, \ pair2_cleansed_src, pair2_cleansed_tar, train, test) parser.match(pair1_test_src, pair2_test_tar, pair2_test_src, pair2_test_tar) pair1_target_train_filename = pair1.get_target_train_filename() pair2_target_train_filename = pair2.get_target_train_filename() pair1_train_src, pair1_train_tar = pair1.get_train_filenames() pair2_train_src, pair2_train_tar = pair2.get_train_filenames() trainer = Train(path_to_moses, ncpus, ngram, False) trainer.build_language_models(pair1_target_train_filename) trainer.build_language_models(pair2_target_train_filename) trainer.train(pair1_train_src, pair1_train_tar, work_dir1) trainer.train(pair2_train_src, pair2_train_tar, work_dir2) pair1_tune_src, pair1_tune_tar = pair1.get_tune_filenames() pair2_tune_src, pair2_tune_tar = pair2.get_tune_filenames() tuner = Tune(path_to_moses, ncpus, False) tuner.tune(pair1_tune_src, pair1_tune_tar, work_dir1) tuner.tune(pair2_tune_src, pair2_tune_tar, work_dir2) pair1_test_src, pair1_test_tar = pair1.get_test_filenames() pair2_test_src, pair2_test_tar = pair2.get_test_filenames() pair1_test_tar = pair1.get_eval_filename() pair2_test_tar = pair2.get_eval_filename() test = Test(path_to_moses, False) test.test_pivoting_quality(pair1_test_tar, work_dir1, pair2_test_tar, work_dir2)
def main(): config = utilities.config_file_reader() path_to_moses = utilities.safe_string(config.get("Environment Settings", "path_to_moses_decoder")) NGRAM = config.getint("Environment Settings", "ngram") NCPUS = config.getint("Environment Settings", "ncpus") trainer = Train(path_to_moses, NCPUS, NGRAM) trainer.build_language_models("data/train/europarl-v7.es-en.en.tok.cleansed.train") trainer.build_language_models("data/train/europarl-v7.fr-en.fr.tok.cleansed.train") trainer.train("data/train/europarl-v7.es-en.es.tok.cleansed.train", "data/train/europarl-v7.es-en.en.tok.cleansed.train", "es-en.working") trainer.train("data/train/europarl-v7.fr-en.en.tok.cleansed.train", "data/train/europarl-v7.fr-en.fr.tok.cleansed.train", "en-fr.working")
def main(): config = utilities.config_file_reader() path_to_moses = utilities.safe_string( config.get("Environment Settings", "path_to_moses_decoder")) test = Test(path_to_moses) test.test_translation_quality( "data/test/europarl-v7.es-en.es.tok.cleansed.test", "data/test/europarl-v7.es-en.en.tok.cleansed.test", "es-en.working") test.test_translation_quality( "data/test/europarl-v7.fr-en.en.tok.cleansed.test", "data/test/europarl-v7.fr-en.fr.tok.cleansed.test", "en-fr.working") test.test_pivoting_quality( "data/test/europarl-v7.es-en.es.tok.cleansed.test.matched", "es-en.working", "data/test/europarl-v7.fr-en.fr.tok.cleansed.test.matched", "en-fr.working")
def main(): config = utilities.config_file_reader() path_to_moses = utilities.safe_string(config.get("Environment Settings", "path_to_moses_decoder")) server = Server(path_to_moses) server.translate_interactive("es-en.working")
def handler(self, conn, a): print("* {}:{} connected...".format(a[0], a[1])) # Send a message asking client to identify client UUID conn.sendall(str.encode('auth-request')) player_id = None while True: try: data = conn.recv(1024) message = data.decode('UTF-8') message = message.replace('\n', '') if not data: print("* {}:{} disconnected...".format(a[0], a[1])) # Remove from connections self.connections.remove(conn) # Remove from players self.players.pop(player_id, None) self.disconnections.append(player_id) # Prepare to close connection conn.shutdown(socket.SHUT_RDWR) break if len(message) <= 1: continue # If user has not identified if not player_id: # Strip any illegal input player_id = safe_string(message) if len(player_id) < 1: continue if player_id not in self.players: # Set coordinate coordinate = "0.0,0.0,0.0,0.0,180.0,0.0" self.players[player_id] = {} self.players[player_id]['location'] = coordinate conn.sendall( str.encode("auth-success,{},{}".format( player_id, coordinate))) # Send all locations of current players else: coordinate = self.players.get( player_id, {}).get('location') or "0.0,0.0,0.0,0.0,180.0,0.0" conn.sendall( str.encode("auth-success,{},{}".format( player_id, coordinate))) else: messages = message.split(';') for msg in messages: arr = msg.split(',') # Handle chat message if arr[0] == 'chat': self.chat.append(player_id[:5] + ": " + arr[1] + ";") # Handle position update if arr[0] == 'position': # Position rx = float(arr[1]) ry = float(arr[2]) rz = float(arr[3]) # Rotation px = float(arr[5]) py = float(arr[6]) pz = float(arr[7]) self.players[player_id][ 'location'] = '{},{},{},{},{},{};'.format( rx, ry, rz, px, py, pz) conn.sendall(str.encode("update-success")) except socket.error as e: # print("Error! {}".format(e)) break conn.close()