def _makeToyEnDeData(self, with_alignments=False): data_config = {} features_file = test_util.make_data_file( os.path.join(self.get_temp_dir(), "src.txt"), ["Parliament Does Not Support Amendment Freeing Tymoshenko", "Today , the Ukraine parliament dismissed , within the Code of Criminal Procedure " "amendment , the motion to revoke an article based on which the opposition leader , " "Yulia Tymoshenko , was sentenced .", "The amendment that would lead to freeing the imprisoned former Prime Minister was " "revoked during second reading of the proposal for mitigation of sentences for " "economic offences ."]) labels_file = test_util.make_data_file( os.path.join(self.get_temp_dir(), "tgt.txt"), ["Keine befreiende Novelle für Tymoshenko durch das Parlament", "Das ukrainische Parlament verweigerte heute den Antrag , im Rahmen einer Novelle " "des Strafgesetzbuches denjenigen Paragrafen abzuschaffen , auf dessen Grundlage die " "Oppositionsführerin Yulia Timoshenko verurteilt worden war .", "Die Neuregelung , die den Weg zur Befreiung der inhaftierten Expremierministerin hätte " "ebnen können , lehnten die Abgeordneten bei der zweiten Lesung des Antrags auf Milderung " "der Strafen für wirtschaftliche Delikte ab ."]) data_config["source_vocabulary"] = test_util.make_vocab_from_file( os.path.join(self.get_temp_dir(), "src_vocab.txt"), features_file) data_config["target_vocabulary"] = test_util.make_vocab_from_file( os.path.join(self.get_temp_dir(), "tgt_vocab.txt"), labels_file) if with_alignments: # Dummy and incomplete alignments. data_config["train_alignments"] = test_util.make_data_file( os.path.join(self.get_temp_dir(), "aligne.txt"), ["0-0 1-0 2-2 3-4 4-4 5-6", "0-1 1-1 1-3 2-3 4-4", "0-0 1-0 2-2 3-4 4-4 5-6"]) return features_file, labels_file, data_config
def _makeToyTaggerData(self): data_config = {} features_file = test_util.make_data_file( os.path.join(self.get_temp_dir(), "src.txt"), ["M . Smith went to Washington .", "I live in New Zealand ."], ) labels_file = test_util.make_data_file( os.path.join(self.get_temp_dir(), "labels.txt"), ["B-PER I-PER E-PER O O S-LOC O", "O O O B-LOC E-LOC O"], ) data_config["source_vocabulary"] = test_util.make_vocab_from_file( os.path.join(self.get_temp_dir(), "src_vocab.txt"), features_file) data_config["target_vocabulary"] = test_util.make_data_file( os.path.join(self.get_temp_dir(), "labels_vocab.txt"), [ "O", "B-LOC", "I-LOC", "E-LOC", "S-LOC", "B-PER", "I-PER", "E-PER", "S-PER", ], ) return features_file, labels_file, data_config
def _makeToyClassifierData(self): data_config = {} features_file = test_util.make_data_file( os.path.join(self.get_temp_dir(), "src.txt"), ["This product was not good at all , it broke on the first use !", "Perfect , it does everything I need .", "How do I change the battery ?"]) labels_file = test_util.make_data_file( os.path.join(self.get_temp_dir(), "labels.txt"), ["negative", "positive", "neutral"]) data_config["source_vocabulary"] = test_util.make_vocab_from_file( os.path.join(self.get_temp_dir(), "src_vocab.txt"), features_file) data_config["target_vocabulary"] = test_util.make_data_file( os.path.join(self.get_temp_dir(), "labels_vocab.txt"), ["negative", "positive", "neutral"]) return features_file, labels_file, data_config