def plot_confusion_matrix(feature_type, preprocessing_type, results_filename='results.pkl', normalize=True): with open(os.path.join(RESULTS_DIR, results_filename), 'rb') as f: results = pickle.load(f) y_true = results[preprocessing_type]['y_test'].map({v: k for k, v in LANGUAGES.items()}) y_pred = results[preprocessing_type][feature_type]['test_pred'].map({v: k for k, v in LANGUAGES.items()}) # title = f"features: {feature_type} ; preprocessing: {preprocessing_type}" _plot_confusion_matrix(y_true, y_pred, LANGUAGES.keys(), normalize=normalize)
def __init__(self, rid, rev='HEAD'): Repo.__init__(self, os.path.join(settings.REPO_DIR, str(rid))) # Store the repo id. self.id = rid try: self.rev = self.commit(rev) except BadObject: if rev == 'HEAD': self.rev = None else: raise # Determine the owner. if self.rev: cr = self.config_reader('repository') self.owner = { 'email': cr.get('user', 'email'), 'name': cr.get('user', 'name'), } else: self.owner = None # Determine what the main file is. self.mainfile = None self.files = [entry[0] for entry in self.index.entries.keys()] if self.files: self.files.remove('title') self.mainfile = self.files[0] # Determine the language of the main file. self.language = None if self.mainfile: ext = '.' + self.mainfile.rsplit('.')[-1] for lang in LANGUAGES.values(): if ext in lang['extensions']: self.language = lang break # Get the commits. if self.rev: self.commits = [self.commit('HEAD')] self.commits += [ commit for commit in self.commits[0].iter_parents() ] else: self.commits = [] self.has_history = len(self.commits) > 1 # Get the tags. self.has_releases = len(self.tags) > 0 self.revtag = None if self.rev: for tag in self.tags: if tag.commit == self.rev: self.revtag = tag break
def __init__(self, rid, rev='HEAD'): Repo.__init__(self, os.path.join(settings.REPO_DIR, str(rid))) # Store the repo id. self.id = rid try: self.rev = self.commit(rev) except BadObject: if rev == 'HEAD': self.rev = None else: raise # Determine the owner. if self.rev: cr = self.config_reader('repository') self.owner = { 'email': cr.get('user', 'email'), 'name': cr.get('user', 'name'), } else: self.owner = None # Determine what the main file is. self.mainfile = None self.files = [entry[0] for entry in self.index.entries.keys()] if self.files: self.files.remove('title') self.mainfile = self.files[0] # Determine the language of the main file. self.language = None if self.mainfile: ext = '.' + self.mainfile.rsplit('.')[-1] for lang in LANGUAGES.values(): if ext in lang['extensions']: self.language = lang break # Get the commits. if self.rev: self.commits = [self.commit('HEAD')] self.commits += [commit for commit in self.commits[0].iter_parents()] else: self.commits = [] self.has_history = len(self.commits) > 1 # Get the tags. self.has_releases = len(self.tags) > 0 self.revtag = None if self.rev: for tag in self.tags: if tag.commit == self.rev: self.revtag = tag break
def download(language): """Download the UniMorph dataset for `language`. Parameters ---------- language : str The ISO 639-3 code for the language Returns ------- int Exit status of the git command (zero if succesful, non-zero if not) """ url = f"https://github.com/unimorph/{language}.git" destination = os.path.join(UNIMORPH_DIR, language) command = f"git clone --quiet {url} {destination}" return os.system(command) if __name__ == "__main__": refresh(UNIMORPH_DIR) for name, code in LANGUAGES.items(): status = download(code) if status == 0: print(f"Downloaded UniMorph data for {name}") else: print(f"No UniMorph data for {name}")
if length > 512: continue for mask in ["masked", "other_masked"]: try: predictions = bert.predict(example[mask], fold_case) except ValueError: # MASK not in sentence continue # drop words we don't have features for predictions = predictions[predictions.index.isin(features_vocab)] file_name = f'{example["uid"]}.csv' if mask == "other_masked": file_name = "reverse-" + file_name file_name = os.path.join(PROBABILITIES_DIR, code, file_name) predictions.to_csv(file_name) if __name__ == "__main__": # get probabilities for languages with fewer cloze examples first already_done = ["bre", "hun", "hye", "tam", "tel", "tur"] ORDER = { language: len(pd.read_csv(os.path.join(CLOZE_DIR, f"{code}.csv"))) for language, code in LANGUAGES.items() if code not in already_done } ORDER = {"Czech": 0, "German": 1} for language in sorted(ORDER, key=ORDER.get): try: result = run(language) print(f"Finished with {language}") except: # noqa print(f"Error with {language}")
def __init__(self): super().__init__() import module_locator self.module_path = module_locator.module_path() self.tr = Translator(os.path.join(self.module_path, 'config.ini')) self.history = History(os.path.join(self.module_path, 'history.txt')) self.threads = [None for _ in range(5)] self.setupUi(self) self.center() self.inputEdit.pasted.connect(self.translate) self.inputEdit.up_pressed.connect(self.navigate_history_forward) self.inputEdit.down_pressed.connect(self.navigate_history_backward) self.inputEdit.installEventFilter(self) self.actionExit = QAction('Exit', self, shortcut='Ctrl+Shift+Q', triggered=self.close) self.actionHideOrShow = QAction('Hide', self, shortcut='Ctrl+Q', triggered=self.hide_or_show) self.actionTranslate = QAction('Translate', self, triggered=self.translate) self.actionTranslate.setShortcuts( [16777220, Qt.CTRL + Qt.Key_Space, Qt.Key_Enter]) self.translateButton.pressed.connect(self.translate) self.actionSettings = QAction('Settings') selectInputAction = QAction('Focus input', self, shortcut='Ctrl+L', triggered=self.input_edit_set_focus) self.addAction(selectInputAction) self.tray = QSystemTrayIcon( QIcon(os.path.join(self.module_path, 'icon.png')), self) trayMenu = QMenu() trayMenu.addAction(self.actionHideOrShow) trayMenu.addAction(self.actionSettings) trayMenu.addAction(self.actionExit) self.tray.setContextMenu(trayMenu) self.tray.show() self.menuTranslator.addAction(self.actionTranslate) self.menuTranslator.addAction(self.actionHideOrShow) self.menuTranslator.addAction(self.actionExit) self.menuTranslator.addAction(self.actionSettings) self.menuBar.addAction(self.menuTranslator.menuAction()) self.show() dictionary_path = os.path.join(self.module_path, 'dictionary.txt') if os.path.exists(dictionary_path): with open(dictionary_path, "r", encoding='utf-8', newline='') as f: lines_list = list(map(str.strip, f.readlines())) completer = QCompleter(lines_list, self.inputEdit) completer.setCaseSensitivity(Qt.CaseInsensitive) self.inputEdit.setCompleter(completer) self.swapLangsButton.setIcon( QIcon(os.path.join(self.module_path, 'arrowupdown.png'))) self.swapLangsButton.pressed.connect(self.swap_languages) for code, name in LANGUAGES.items(): self.fromComboBox.addItem(name, userData=code) self.toComboBox.addItem(name, userData=code)