def test_spanish_tune(self): create_hybrid_corpora(self.cconf) with open(os.path.join(TEST_PATH, "temp_files", "tune.en-es.es")) as f: self.assertEqual( f.read().strip(), "s16-l\ns17-l\ns18-l\ns19-l\ns4-m\ns5-m\ns6-m\ns7-m\ns4-m\ns5-m" )
def test_english_tune(self): create_hybrid_corpora(self.cconf) with open(os.path.join(TEST_PATH, "temp_files", "tune.en-es.en")) as f: self.assertEqual( f.read().strip(), "e16-l\ne17-l\ne18-l\ne19-l\ne4-m\ne5-m\ne6-m\ne7-m\ne4-m\ne5-m" )
def test_spanish_train(self): create_hybrid_corpora(self.cconf) with open(os.path.join(TEST_PATH, "temp_files", "train.en-es.es")) as f: self.assertEqual( f.read().strip(), "s1-l\ns2-l\ns3-l\ns4-l\ns5-l\ns6-l\ns7-l\ns8-l\ns9-l\ns10-l\ns11-l\ns12-l\ns13-l\ns14-l\ns15-l\ns1-m\ns2-m\ns3-m\ns1-m\ns2-m\ns3-m\ns1-m\ns2-m\ns3-m\ns1-m\ns2-m\ns3-m\ns1-m\ns2-m\ns3-m" )
def test_english_train(self): print self.cconf create_hybrid_corpora(self.cconf) with open(os.path.join(TEST_PATH, "temp_files", "train.en-es.en")) as f: self.assertEqual( f.read().strip(), "e1-l\ne2-l\ne3-l\ne4-l\ne5-l\ne6-l\ne7-l\ne8-l\ne9-l\ne10-l\ne11-l\ne12-l\ne13-l\ne14-l\ne15-l\ne1-m\ne2-m\ne3-m\ne1-m\ne2-m\ne3-m\ne1-m\ne2-m\ne3-m\ne1-m\ne2-m\ne3-m\ne1-m\ne2-m\ne3-m" )
def create_corpora(args): conf = fetch_config(args) if args.t_corpora_config is None: cconf = conf.system.files.data.corpora elif os.path.isfile(args.t_corpora_config): cconf = CorporaConfig(ingest_yaml_doc(args.t_corpora_config)) else: logger.error(args.t_corpora_config + " doesn't exist") return if os.path.exists(cconf.container_path): logger.error(cconf.container_path + " already exists. Please delete it or change the container and try again") return create_hybrid_corpora(cconf)
def create_corpora(args): conf = fetch_config(args) if args.t_corpora_config is None: cconf = conf.system.files.data.corpora elif os.path.isfile(args.t_corpora_config): cconf = CorporaConfig(args.t_corpora_config) else: logger.error(args.t_corpora_config + " doesn't exist") return if os.path.exists(cconf.container_path): logger.error(cconf.container_path + " already exists. Please remove and try again") return create_hybrid_corpora(cconf)
def test_english_test(self): create_hybrid_corpora(self.cconf) with open(os.path.join(TEST_PATH, "temp_files", "test.en-es.en")) as f: self.assertEqual(f.read().strip(), "e20-l\ne20-l\ne8-m\ne9-m\ne10-m")
def test_english_train(self): create_hybrid_corpora(self.cconf) with open(os.path.join(TEST_PATH, "temp_files", "train.en-es.en")) as f: self.assertEqual(f.read().strip(), "hello\ne1-m\ne2-m\ne3-m")
def test_spanish_train(self): create_hybrid_corpora(self.cconf) with open(os.path.join(TEST_PATH, "temp_files", "train.en-es.es")) as f: self.assertEqual(f.read().strip(), "s1-l\ns2-l\ns3-l\ns4-l\ns5-l\ns6-l\ns7-l\ns8-l\ns9-l\ns10-l\ns11-l\ns12-l\ns13-l\ns14-l\ns15-l\ns1-m\ns2-m\ns3-m\ns1-m\ns2-m\ns3-m\ns1-m\ns2-m\ns3-m\ns1-m\ns2-m\ns3-m\ns1-m\ns2-m\ns3-m")
def test_spanish_test(self): create_hybrid_corpora(self.cconf) with open(os.path.join(TEST_PATH, "temp_files", "test.en-es.es")) as f: self.assertEqual(f.read().strip(), "")
def test_english_train(self): create_hybrid_corpora(self.cconf) with open(os.path.join(TEST_PATH, "temp_files", "train.en-es.en")) as f: self.assertEqual(f.read().strip(), "hello")
def test_spanish_tune(self): create_hybrid_corpora(self.cconf) with open(os.path.join(TEST_PATH, "temp_files", "tune.en-es.es")) as f: self.assertEqual(f.read().strip(), "s16-l\ns17-l\ns18-l\ns19-l\ns4-m\ns5-m\ns6-m\ns7-m\ns4-m\ns5-m")
def test_english_tune(self): create_hybrid_corpora(self.cconf) with open(os.path.join(TEST_PATH, "temp_files", "tune.en-es.en")) as f: self.assertEqual(f.read().strip(), "e16-l\ne17-l\ne18-l\ne19-l\ne4-m\ne5-m\ne6-m\ne7-m\ne4-m\ne5-m")
def test_spanish_test(self): create_hybrid_corpora(self.cconf) with open(os.path.join(TEST_PATH, "temp_files", "test.en-es.es")) as f: self.assertEqual(f.read().strip(), "s20-l\ns20-l\ns8-m\ns9-m\ns10-m")
def test_spanish_train(self): create_hybrid_corpora(self.cconf) with open(os.path.join(TEST_PATH, "temp_files", "train.en-es.es")) as f: self.assertEqual(f.read().strip(), "hola")
def test_english_train(self): print self.cconf create_hybrid_corpora(self.cconf) with open(os.path.join(TEST_PATH, "temp_files", "train.en-es.en")) as f: self.assertEqual(f.read().strip(), "e1-l\ne2-l\ne3-l\ne4-l\ne5-l\ne6-l\ne7-l\ne8-l\ne9-l\ne10-l\ne11-l\ne12-l\ne13-l\ne14-l\ne15-l\ne1-m\ne2-m\ne3-m\ne1-m\ne2-m\ne3-m\ne1-m\ne2-m\ne3-m\ne1-m\ne2-m\ne3-m\ne1-m\ne2-m\ne3-m")