Example #1
0
def plot_confusion_matrix(feature_type, preprocessing_type, results_filename='results.pkl', normalize=True):
    with open(os.path.join(RESULTS_DIR, results_filename), 'rb') as f:
        results = pickle.load(f)
        y_true = results[preprocessing_type]['y_test'].map({v: k for k, v in LANGUAGES.items()})
        y_pred = results[preprocessing_type][feature_type]['test_pred'].map({v: k for k, v in LANGUAGES.items()})
    # title = f"features: {feature_type}   ;   preprocessing: {preprocessing_type}"
    _plot_confusion_matrix(y_true, y_pred, LANGUAGES.keys(), normalize=normalize)
Example #2
0
    def __init__(self, rid, rev='HEAD'):
        Repo.__init__(self, os.path.join(settings.REPO_DIR, str(rid)))

        # Store the repo id.
        self.id = rid
        try:
            self.rev = self.commit(rev)
        except BadObject:
            if rev == 'HEAD':
                self.rev = None
            else:
                raise

        # Determine the owner.
        if self.rev:
            cr = self.config_reader('repository')
            self.owner = {
                'email': cr.get('user', 'email'),
                'name': cr.get('user', 'name'),
            }
        else:
            self.owner = None

        # Determine what the main file is.
        self.mainfile = None
        self.files = [entry[0] for entry in self.index.entries.keys()]
        if self.files:
            self.files.remove('title')
            self.mainfile = self.files[0]

        # Determine the language of the main file.
        self.language = None
        if self.mainfile:
            ext = '.' + self.mainfile.rsplit('.')[-1]
            for lang in LANGUAGES.values():
                if ext in lang['extensions']:
                    self.language = lang
                    break

        # Get the commits.
        if self.rev:
            self.commits = [self.commit('HEAD')]
            self.commits += [
                commit for commit in self.commits[0].iter_parents()
            ]
        else:
            self.commits = []
        self.has_history = len(self.commits) > 1

        # Get the tags.
        self.has_releases = len(self.tags) > 0
        self.revtag = None
        if self.rev:
            for tag in self.tags:
                if tag.commit == self.rev:
                    self.revtag = tag
                    break
Example #3
0
    def __init__(self, rid, rev='HEAD'):
        Repo.__init__(self, os.path.join(settings.REPO_DIR, str(rid)))

        # Store the repo id.
        self.id = rid
        try:
            self.rev = self.commit(rev)
        except BadObject:
            if rev == 'HEAD':
                self.rev = None
            else:
                raise

        # Determine the owner.
        if self.rev:
            cr = self.config_reader('repository')
            self.owner = {
                'email': cr.get('user', 'email'),
                'name': cr.get('user', 'name'),
            }
        else:
            self.owner = None

        # Determine what the main file is.
        self.mainfile = None
        self.files = [entry[0] for entry in self.index.entries.keys()]
        if self.files:
            self.files.remove('title')
            self.mainfile = self.files[0]

        # Determine the language of the main file.
        self.language = None
        if self.mainfile:
            ext = '.' + self.mainfile.rsplit('.')[-1]
            for lang in LANGUAGES.values():
                if ext in lang['extensions']:
                    self.language = lang
                    break

        # Get the commits.
        if self.rev:
            self.commits = [self.commit('HEAD')]
            self.commits += [commit for commit in self.commits[0].iter_parents()]
        else:
            self.commits = []
        self.has_history = len(self.commits) > 1

        # Get the tags.
        self.has_releases = len(self.tags) > 0
        self.revtag = None
        if self.rev:
            for tag in self.tags:
                if tag.commit == self.rev:
                    self.revtag = tag
                    break
Example #4
0

def download(language):
    """Download the UniMorph dataset for `language`.

    Parameters
    ----------
    language : str
        The ISO 639-3 code for the language

    Returns
    -------
    int
        Exit status of the git command (zero if succesful, non-zero if not)

    """
    url = f"https://github.com/unimorph/{language}.git"
    destination = os.path.join(UNIMORPH_DIR, language)
    command = f"git clone --quiet {url} {destination}"
    return os.system(command)


if __name__ == "__main__":
    refresh(UNIMORPH_DIR)
    for name, code in LANGUAGES.items():
        status = download(code)
        if status == 0:
            print(f"Downloaded UniMorph data for {name}")
        else:
            print(f"No UniMorph data for {name}")
Example #5
0
        if length > 512:
            continue
        for mask in ["masked", "other_masked"]:
            try:
                predictions = bert.predict(example[mask], fold_case)
            except ValueError:  # MASK not in sentence
                continue
            # drop words we don't have features for
            predictions = predictions[predictions.index.isin(features_vocab)]
            file_name = f'{example["uid"]}.csv'
            if mask == "other_masked":
                file_name = "reverse-" + file_name
            file_name = os.path.join(PROBABILITIES_DIR, code, file_name)
            predictions.to_csv(file_name)


if __name__ == "__main__":
    # get probabilities for languages with fewer cloze examples first
    already_done = ["bre", "hun", "hye", "tam", "tel", "tur"]
    ORDER = {
        language: len(pd.read_csv(os.path.join(CLOZE_DIR, f"{code}.csv")))
        for language, code in LANGUAGES.items() if code not in already_done
    }
    ORDER = {"Czech": 0, "German": 1}
    for language in sorted(ORDER, key=ORDER.get):
        try:
            result = run(language)
            print(f"Finished with {language}")
        except:  # noqa
            print(f"Error with {language}")
Example #6
0
    def __init__(self):
        super().__init__()
        import module_locator
        self.module_path = module_locator.module_path()
        self.tr = Translator(os.path.join(self.module_path, 'config.ini'))
        self.history = History(os.path.join(self.module_path, 'history.txt'))
        self.threads = [None for _ in range(5)]
        self.setupUi(self)
        self.center()

        self.inputEdit.pasted.connect(self.translate)
        self.inputEdit.up_pressed.connect(self.navigate_history_forward)
        self.inputEdit.down_pressed.connect(self.navigate_history_backward)
        self.inputEdit.installEventFilter(self)

        self.actionExit = QAction('Exit',
                                  self,
                                  shortcut='Ctrl+Shift+Q',
                                  triggered=self.close)
        self.actionHideOrShow = QAction('Hide',
                                        self,
                                        shortcut='Ctrl+Q',
                                        triggered=self.hide_or_show)
        self.actionTranslate = QAction('Translate',
                                       self,
                                       triggered=self.translate)
        self.actionTranslate.setShortcuts(
            [16777220, Qt.CTRL + Qt.Key_Space, Qt.Key_Enter])
        self.translateButton.pressed.connect(self.translate)
        self.actionSettings = QAction('Settings')
        selectInputAction = QAction('Focus input',
                                    self,
                                    shortcut='Ctrl+L',
                                    triggered=self.input_edit_set_focus)
        self.addAction(selectInputAction)

        self.tray = QSystemTrayIcon(
            QIcon(os.path.join(self.module_path, 'icon.png')), self)
        trayMenu = QMenu()
        trayMenu.addAction(self.actionHideOrShow)
        trayMenu.addAction(self.actionSettings)
        trayMenu.addAction(self.actionExit)
        self.tray.setContextMenu(trayMenu)
        self.tray.show()

        self.menuTranslator.addAction(self.actionTranslate)
        self.menuTranslator.addAction(self.actionHideOrShow)
        self.menuTranslator.addAction(self.actionExit)
        self.menuTranslator.addAction(self.actionSettings)
        self.menuBar.addAction(self.menuTranslator.menuAction())

        self.show()
        dictionary_path = os.path.join(self.module_path, 'dictionary.txt')
        if os.path.exists(dictionary_path):
            with open(dictionary_path, "r", encoding='utf-8', newline='') as f:
                lines_list = list(map(str.strip, f.readlines()))
            completer = QCompleter(lines_list, self.inputEdit)
            completer.setCaseSensitivity(Qt.CaseInsensitive)
            self.inputEdit.setCompleter(completer)

        self.swapLangsButton.setIcon(
            QIcon(os.path.join(self.module_path, 'arrowupdown.png')))
        self.swapLangsButton.pressed.connect(self.swap_languages)

        for code, name in LANGUAGES.items():
            self.fromComboBox.addItem(name, userData=code)
            self.toComboBox.addItem(name, userData=code)