def test_ilg_mismatched(ilg_test_dir):
    mismatched_path = os.path.join(ilg_test_dir, 'test_mismatched.txt')

    ats = [AnnotationType('spelling', 'transcription',
                                        None, token = False, anchor = True),
                                    AnnotationType('transcription', None, None,
                                        token = False, base = True,
                                        attribute = Attribute('transcription','tier'))]
    ats[1].trans_delimiter = '.'
    with pytest.raises(ILGWordMismatchError):
        t = load_discourse_ilg('test', mismatched_path, ats)
def test_export_ilg(export_test_dir, unspecified_test_corpus):
    d = generate_discourse(unspecified_test_corpus)
    export_path = os.path.join(export_test_dir, 'test_export_ilg.txt')
    export_discourse_ilg(d, export_path)

    d2 = load_discourse_ilg('test', export_path)

    for k in unspecified_test_corpus.keys():
        assert(d2.lexicon[k].spelling == unspecified_test_corpus[k].spelling)
        assert(d2.lexicon[k].transcription == unspecified_test_corpus[k].transcription)
        assert(d2.lexicon[k].frequency == unspecified_test_corpus[k].frequency)
    assert(d2.lexicon == unspecified_test_corpus)
Esempio n. 3
0
    def run(self):
        time.sleep(0.1)
        textType = self.kwargs.pop('text_type')
        isDirectory = self.kwargs.pop('isDirectory')
        logging.info('Importing {} corpus named {}'.format(
            textType, self.kwargs['corpus_name']))
        logging.info('Path: '.format(self.kwargs['path']))
        log_annotation_types(self.kwargs['annotation_types'])
        try:
            if textType == 'spelling':

                if isDirectory:
                    corpus = load_directory_spelling(**self.kwargs)
                else:
                    corpus = load_discourse_spelling(**self.kwargs)
            elif textType == 'transcription':

                if isDirectory:
                    corpus = load_directory_transcription(**self.kwargs)
                else:
                    corpus = load_discourse_transcription(**self.kwargs)
            elif textType == 'ilg':

                if isDirectory:
                    corpus = load_directory_ilg(**self.kwargs)
                else:
                    corpus = load_discourse_ilg(**self.kwargs)
            elif textType == 'textgrid':
                if isDirectory:
                    corpus = load_directory_textgrid(**self.kwargs)
                else:
                    corpus = load_discourse_textgrid(**self.kwargs)
            elif textType == 'csv':
                corpus = load_corpus_csv(**self.kwargs)
            elif textType in ['buckeye', 'timit']:
                self.kwargs['dialect'] = textType
                if isDirectory:
                    corpus = load_directory_multiple_files(**self.kwargs)
                else:
                    corpus = load_discourse_multiple_files(**self.kwargs)
        except PCTError as e:
            self.errorEncountered.emit(e)
            return
        except Exception as e:
            e = PCTPythonError(e)
            self.errorEncountered.emit(e)
            return
        if self.stopped:
            time.sleep(0.1)
            self.finishedCancelling.emit()
            return
        self.dataReady.emit(corpus)
Esempio n. 4
0
    def run(self):
        time.sleep(0.1)
        textType = self.kwargs.pop('text_type')
        isDirectory = self.kwargs.pop('isDirectory')
        logging.info('Importing {} corpus named {}'.format(textType, self.kwargs['corpus_name']))
        logging.info('Path: '.format(self.kwargs['path']))
        log_annotation_types(self.kwargs['annotation_types'])
        try:
            if textType == 'spelling':

                if isDirectory:
                    corpus = load_directory_spelling(**self.kwargs)
                else:
                    corpus = load_discourse_spelling(**self.kwargs)
            elif textType == 'transcription':

                if isDirectory:
                    corpus = load_directory_transcription(**self.kwargs)
                else:
                    corpus = load_discourse_transcription(**self.kwargs)
            elif textType == 'ilg':

                if isDirectory:
                    corpus = load_directory_ilg(**self.kwargs)
                else:
                    corpus = load_discourse_ilg(**self.kwargs)
            elif textType == 'textgrid':
                if isDirectory:
                    corpus = load_directory_textgrid(**self.kwargs)
                else:
                    corpus = load_discourse_textgrid(**self.kwargs)
            elif textType == 'csv':
                corpus = load_corpus_csv(**self.kwargs)
            elif textType in ['buckeye', 'timit']:
                self.kwargs['dialect'] = textType
                if isDirectory:
                    corpus = load_directory_multiple_files(**self.kwargs)
                else:
                    corpus = load_discourse_multiple_files(**self.kwargs)
        except PCTError as e:
            self.errorEncountered.emit(e)
            return
        except Exception as e:
            e = PCTPythonError(e)
            self.errorEncountered.emit(e)
            return
        if self.stopped:
            time.sleep(0.1)
            self.finishedCancelling.emit()
            return
        self.dataReady.emit(corpus)
Esempio n. 5
0
def test_export_ilg(export_test_dir, unspecified_test_corpus):
    d = generate_discourse(unspecified_test_corpus)
    export_path = os.path.join(export_test_dir, 'test_export_ilg.txt')
    export_discourse_ilg(d, export_path)

    d2 = load_discourse_ilg('test', export_path)

    for k in unspecified_test_corpus.keys():
        assert (d2.lexicon[k].spelling == unspecified_test_corpus[k].spelling)
        assert (d2.lexicon[k].transcription ==
                unspecified_test_corpus[k].transcription)
        assert (
            d2.lexicon[k].frequency == unspecified_test_corpus[k].frequency)
    assert (d2.lexicon == unspecified_test_corpus)
def test_ilg_basic(ilg_test_dir):
    basic_path = os.path.join(ilg_test_dir, 'test_basic.txt')
    tier_att = Attribute('transcription','tier')
    tier_att.delimiter = '.'
    ats = [AnnotationType('spelling', 'transcription',
                                        None, token = False, anchor = True),
                                    AnnotationType('transcription', None, None,
                                        token = False, base = True,
                                        attribute = tier_att)]
    ats[1].trans_delimiter = '.'
    corpus = load_discourse_ilg('test', basic_path, ats)
    print(corpus.words)
    print(corpus.lexicon.words)
    assert(corpus.lexicon.find('a').frequency == 2)
Esempio n. 7
0
def test_ilg_mismatched(ilg_test_dir):
    mismatched_path = os.path.join(ilg_test_dir, 'test_mismatched.txt')

    ats = [
        AnnotationType('spelling',
                       'transcription',
                       None,
                       token=False,
                       anchor=True),
        AnnotationType('transcription',
                       None,
                       None,
                       token=False,
                       base=True,
                       attribute=Attribute('transcription', 'tier'))
    ]
    ats[1].trans_delimiter = '.'
    with pytest.raises(ILGWordMismatchError):
        t = load_discourse_ilg('test', mismatched_path, ats)
Esempio n. 8
0
def test_ilg_basic(ilg_test_dir):
    basic_path = os.path.join(ilg_test_dir, 'test_basic.txt')
    tier_att = Attribute('transcription', 'tier')
    tier_att.delimiter = '.'
    ats = [
        AnnotationType('spelling',
                       'transcription',
                       None,
                       token=False,
                       anchor=True),
        AnnotationType('transcription',
                       None,
                       None,
                       token=False,
                       base=True,
                       attribute=tier_att)
    ]
    ats[1].trans_delimiter = '.'
    corpus = load_discourse_ilg('test', basic_path, ats)
    print(corpus.words)
    print(corpus.lexicon.words)
    assert (corpus.lexicon.find('a').frequency == 2)