예제 #1
0
def test_load_spelling_ignore(text_test_dir):
    spelling_path = os.path.join(text_test_dir, 'test_text_spelling.txt')
    a = inspect_discourse_spelling(spelling_path)
    a[0].ignored_characters = set(["'", '.'])
    c = load_discourse_spelling('test', spelling_path, a)

    assert (c.lexicon['ab'].frequency == 3)
    assert (c.lexicon['cabd'].frequency == 1)
def test_load_spelling_ignore(text_test_dir):
    spelling_path = os.path.join(text_test_dir, 'test_text_spelling.txt')
    a = inspect_discourse_spelling(spelling_path)
    a[0].ignored_characters = set(["'",'.'])
    c = load_discourse_spelling('test',spelling_path, a)

    assert(c.lexicon['ab'].frequency == 3)
    assert(c.lexicon['cabd'].frequency == 1)
def test_export_spelling(export_test_dir, unspecified_test_corpus):
    d = generate_discourse(unspecified_test_corpus)
    export_path = os.path.join(export_test_dir, 'test_export_spelling.txt')
    export_discourse_spelling(d, export_path, single_line = False)

    d2 = load_discourse_spelling('test', export_path)
    for k in unspecified_test_corpus.keys():
        assert(d2.lexicon[k].spelling == unspecified_test_corpus[k].spelling)
        assert(d2.lexicon[k].frequency == unspecified_test_corpus[k].frequency)
예제 #4
0
def test_export_spelling(export_test_dir, unspecified_test_corpus):
    d = generate_discourse(unspecified_test_corpus)
    export_path = os.path.join(export_test_dir, 'test_export_spelling.txt')
    export_discourse_spelling(d, export_path, single_line=False)

    d2 = load_discourse_spelling('test', export_path)
    for k in unspecified_test_corpus.keys():
        assert (d2.lexicon[k].spelling == unspecified_test_corpus[k].spelling)
        assert (
            d2.lexicon[k].frequency == unspecified_test_corpus[k].frequency)
예제 #5
0
    def run(self):
        time.sleep(0.1)
        textType = self.kwargs.pop('text_type')
        isDirectory = self.kwargs.pop('isDirectory')
        logging.info('Importing {} corpus named {}'.format(
            textType, self.kwargs['corpus_name']))
        logging.info('Path: '.format(self.kwargs['path']))
        log_annotation_types(self.kwargs['annotation_types'])
        try:
            if textType == 'spelling':

                if isDirectory:
                    corpus = load_directory_spelling(**self.kwargs)
                else:
                    corpus = load_discourse_spelling(**self.kwargs)
            elif textType == 'transcription':

                if isDirectory:
                    corpus = load_directory_transcription(**self.kwargs)
                else:
                    corpus = load_discourse_transcription(**self.kwargs)
            elif textType == 'ilg':

                if isDirectory:
                    corpus = load_directory_ilg(**self.kwargs)
                else:
                    corpus = load_discourse_ilg(**self.kwargs)
            elif textType == 'textgrid':
                if isDirectory:
                    corpus = load_directory_textgrid(**self.kwargs)
                else:
                    corpus = load_discourse_textgrid(**self.kwargs)
            elif textType == 'csv':
                corpus = load_corpus_csv(**self.kwargs)
            elif textType in ['buckeye', 'timit']:
                self.kwargs['dialect'] = textType
                if isDirectory:
                    corpus = load_directory_multiple_files(**self.kwargs)
                else:
                    corpus = load_discourse_multiple_files(**self.kwargs)
        except PCTError as e:
            self.errorEncountered.emit(e)
            return
        except Exception as e:
            e = PCTPythonError(e)
            self.errorEncountered.emit(e)
            return
        if self.stopped:
            time.sleep(0.1)
            self.finishedCancelling.emit()
            return
        self.dataReady.emit(corpus)
예제 #6
0
    def run(self):
        time.sleep(0.1)
        textType = self.kwargs.pop('text_type')
        isDirectory = self.kwargs.pop('isDirectory')
        logging.info('Importing {} corpus named {}'.format(textType, self.kwargs['corpus_name']))
        logging.info('Path: '.format(self.kwargs['path']))
        log_annotation_types(self.kwargs['annotation_types'])
        try:
            if textType == 'spelling':

                if isDirectory:
                    corpus = load_directory_spelling(**self.kwargs)
                else:
                    corpus = load_discourse_spelling(**self.kwargs)
            elif textType == 'transcription':

                if isDirectory:
                    corpus = load_directory_transcription(**self.kwargs)
                else:
                    corpus = load_discourse_transcription(**self.kwargs)
            elif textType == 'ilg':

                if isDirectory:
                    corpus = load_directory_ilg(**self.kwargs)
                else:
                    corpus = load_discourse_ilg(**self.kwargs)
            elif textType == 'textgrid':
                if isDirectory:
                    corpus = load_directory_textgrid(**self.kwargs)
                else:
                    corpus = load_discourse_textgrid(**self.kwargs)
            elif textType == 'csv':
                corpus = load_corpus_csv(**self.kwargs)
            elif textType in ['buckeye', 'timit']:
                self.kwargs['dialect'] = textType
                if isDirectory:
                    corpus = load_directory_multiple_files(**self.kwargs)
                else:
                    corpus = load_discourse_multiple_files(**self.kwargs)
        except PCTError as e:
            self.errorEncountered.emit(e)
            return
        except Exception as e:
            e = PCTPythonError(e)
            self.errorEncountered.emit(e)
            return
        if self.stopped:
            time.sleep(0.1)
            self.finishedCancelling.emit()
            return
        self.dataReady.emit(corpus)
def test_load_spelling_no_ignore(text_test_dir):
    spelling_path = os.path.join(text_test_dir, 'test_text_spelling.txt')

    c = load_discourse_spelling('test',spelling_path)

    assert(c.lexicon['ab'].frequency == 2)
예제 #8
0
def test_load_spelling_no_ignore(text_test_dir):
    spelling_path = os.path.join(text_test_dir, 'test_text_spelling.txt')

    c = load_discourse_spelling('test', spelling_path)

    assert (c.lexicon['ab'].frequency == 2)