def load_directory_ilg(corpus_name, path, annotation_types, feature_system_path = None, stop_check = None, call_back = None): """ Loads a directory of interlinear gloss text files Parameters ---------- corpus_name : str Name of corpus path : str Path to directory of text files annotation_types : list of AnnotationType List of AnnotationType specifying how to parse the glosses. Can be generated through ``inspect_discourse_ilg``. feature_system_path : str, optional File path of FeatureMatrix binary to specify segments stop_check : callable or None Optional function to check whether to gracefully terminate early call_back : callable or None Optional function to supply progress information during the loading Returns ------- SpontaneousSpeechCorpus Corpus containing Discourses corresponding to the text files """ if call_back is not None: call_back('Finding files...') call_back(0, 0) file_tuples = [] for root, subdirs, files in os.walk(path): for filename in files: if not filename.lower().endswith('.txt'): continue file_tuples.append((root, filename)) if call_back is not None: call_back('Parsing files...') call_back(0,len(file_tuples)) cur = 0 corpus = SpontaneousSpeechCorpus(corpus_name, path) for i, t in enumerate(file_tuples): if stop_check is not None and stop_check(): return if call_back is not None: call_back('Parsing file {} of {}...'.format(i+1,len(file_tuples))) call_back(i) root, filename = t name = os.path.splitext(filename)[0] d = load_discourse_ilg(name, os.path.join(root,filename), annotation_types, corpus.lexicon, None, stop_check, call_back) corpus.add_discourse(d) if feature_system_path is not None: feature_matrix = load_binary(feature_system_path) corpus.lexicon.set_feature_matrix(feature_matrix) corpus.lexicon.specifier = modernize.modernize_specifier(corpus.lexicon.specifier) return corpus
def load_discourse_textgrid(corpus_name, path, annotation_types, feature_system_path=None, stop_check=None, call_back=None): """ Load a discourse from a TextGrid file Parameters ---------- corpus_name : str Informative identifier to refer to corpus path : str Full path to TextGrid file annotation_types : list of AnnotationType List of AnnotationType specifying how to parse the TextGrids. Can be generated through ``inspect_discourse_textgrid``. lexicon : Corpus, optional Corpus to store Discourse word information feature_system_path : str Full path to pickled FeatureMatrix to use with the Corpus stop_check : callable or None Optional function to check whether to gracefully terminate early call_back : callable or None Optional function to supply progress information during the loading Returns ------- Discourse Discourse object generated from the TextGrid file """ data = textgrid_to_data(corpus_name, path, annotation_types, call_back=call_back, stop_check=stop_check) #textgrid_to_data has side-effects that change annotation_types wav_path = find_wav_path(path) discourse = data_to_discourse2(corpus_name, wav_path, annotation_types, stop_check=stop_check, call_back=call_back) # discourse = data_to_discourse(data, lexicon, call_back=call_back, stop_check=stop_check) # discourse is a Discourse object, see corpus\classes\spontaneous.py if discourse is None: return if feature_system_path is not None: feature_matrix = load_binary(feature_system_path) discourse.lexicon.set_feature_matrix(feature_matrix) discourse.lexicon.specifier = modernize.modernize_specifier( discourse.lexicon.specifier) return discourse
def load_discourse_transcription(corpus_name, path, annotation_types=None, lexicon=None, feature_system_path=None, stop_check=None, call_back=None): """ Load a discourse from a text file containing running transcribed text Parameters ---------- corpus_name : str Informative identifier to refer to corpus path : str Full path to text file annotation_types : list of AnnotationType, optional List of AnnotationType specifying how to parse text files lexicon : Corpus, optional Corpus to store Discourse word information feature_system_path : str, optional Full path to pickled FeatureMatrix to use with the Corpus stop_check : callable, optional Optional function to check whether to gracefully terminate early call_back : callable, optional Optional function to supply progress information during the loading Returns ------- Discourse Discourse object generated from the text file """ if feature_system_path is not None: if not os.path.exists(feature_system_path): raise (PCTOSError( "The feature path specified ({}) does not exist".format( feature_system_path))) data = transcription_text_to_data(corpus_name, path, annotation_types, stop_check=stop_check, call_back=call_back) # discourse = data_to_discourse(data, lexicon, stop_check=stop_check, call_back=call_back) discourse = data_to_discourse2(corpus_name=corpus_name, wav_path=data.wav_path, annotation_types=annotation_types, stop_check=stop_check, call_back=call_back) if feature_system_path is not None: feature_matrix = load_binary(feature_system_path) discourse.lexicon.set_feature_matrix(feature_matrix) discourse.lexicon.specifier = modernize.modernize_specifier( discourse.lexicon.specifier) return discourse
def load_discourse_textgrid(corpus_name, path, annotation_types, feature_system_path = None, support_corpus_path = None, stop_check = None, call_back = None): """ Load a discourse from a TextGrid file Parameters ---------- corpus_name : str Informative identifier to refer to corpus path : str Full path to TextGrid file annotation_types : list of AnnotationType List of AnnotationType specifying how to parse the TextGrids. Can be generated through ``inspect_discourse_textgrid``. lexicon : Corpus, optional Corpus to store Discourse word information feature_system_path : str Full path to pickled FeatureMatrix to use with the Corpus stop_check : callable or None Optional function to check whether to gracefully terminate early call_back : callable or None Optional function to supply progress information during the loading Returns ------- Discourse Discourse object generated from the TextGrid file """ data = textgrid_to_data(corpus_name, path, annotation_types, call_back=call_back, stop_check=stop_check) #textgrid_to_data has side-effects that change annotation_types wav_path = find_wav_path(path) if support_corpus_path is not None: if isinstance(support_corpus_path, Corpus): #the corpus is 'preloaded' if this function is called by load_directory_textgrid #otherwise the corpus has to be loaded once per file in a directory, which could be slow support = support_corpus_path else: #otherwise, it's a string representing a path to the corpus support = load_binary(support_corpus_path) else: support = None discourse = data_to_discourse2(corpus_name, wav_path, annotation_types=annotation_types, support_corpus = support, stop_check=stop_check, call_back=call_back) if feature_system_path is not None: feature_matrix = load_binary(feature_system_path) discourse.lexicon.set_feature_matrix(feature_matrix) discourse.lexicon.specifier = modernize.modernize_specifier(discourse.lexicon.specifier) return discourse
def load_discourse_ilg(corpus_name, path, annotation_types, lexicon=None, feature_system_path=None, stop_check=None, call_back=None): """ Load a discourse from a text file containing interlinear glosses Parameters ---------- corpus_name : str Informative identifier to refer to corpus path : str Full path to text file annotation_types : list of AnnotationType List of AnnotationType specifying how to parse the glosses. Can be generated through ``inspect_discourse_ilg``. lexicon : Corpus, optional Corpus to store Discourse word information feature_system_path : str Full path to pickled FeatureMatrix to use with the Corpus stop_check : callable or None Optional function to check whether to gracefully terminate early call_back : callable or None Optional function to supply progress information during the loading Returns ------- Discourse Discourse object generated from the text file """ data = ilg_to_data(corpus_name, path, annotation_types, stop_check, call_back) #discourse = data_to_discourse(data, lexicon, call_back=call_back, stop_check=stop_check) discourse = data_to_discourse2(corpus_name=corpus_name, annotation_types=annotation_types, stop_check=stop_check, call_back=call_back) if discourse is None: return if feature_system_path is not None: feature_matrix = load_binary(feature_system_path) discourse.lexicon.set_feature_matrix(feature_matrix) discourse.lexicon.specifier = modernize.modernize_specifier( discourse.lexicon.specifier) return discourse
def load_discourse_ilg(corpus_name, path, annotation_types, lexicon = None, feature_system_path = None, stop_check = None, call_back = None): """ Load a discourse from a text file containing interlinear glosses Parameters ---------- corpus_name : str Informative identifier to refer to corpus path : str Full path to text file annotation_types : list of AnnotationType List of AnnotationType specifying how to parse the glosses. Can be generated through ``inspect_discourse_ilg``. lexicon : Corpus, optional Corpus to store Discourse word information feature_system_path : str Full path to pickled FeatureMatrix to use with the Corpus stop_check : callable or None Optional function to check whether to gracefully terminate early call_back : callable or None Optional function to supply progress information during the loading Returns ------- Discourse Discourse object generated from the text file """ data = ilg_to_data(corpus_name, path, annotation_types,stop_check, call_back) #discourse = data_to_discourse(data, lexicon, call_back=call_back, stop_check=stop_check) discourse = data_to_discourse2(corpus_name=corpus_name, annotation_types=annotation_types, stop_check=stop_check, call_back=call_back) if discourse is None: return if feature_system_path is not None: feature_matrix = load_binary(feature_system_path) discourse.lexicon.set_feature_matrix(feature_matrix) discourse.lexicon.specifier = modernize.modernize_specifier(discourse.lexicon.specifier) return discourse
def load_directory_ilg(corpus_name, path, annotation_types, feature_system_path=None, stop_check=None, call_back=None): """ Loads a directory of interlinear gloss text files Parameters ---------- corpus_name : str Name of corpus path : str Path to directory of text files annotation_types : list of AnnotationType List of AnnotationType specifying how to parse the glosses. Can be generated through ``inspect_discourse_ilg``. feature_system_path : str, optional File path of FeatureMatrix binary to specify segments stop_check : callable or None Optional function to check whether to gracefully terminate early call_back : callable or None Optional function to supply progress information during the loading Returns ------- SpontaneousSpeechCorpus Corpus containing Discourses corresponding to the text files """ if call_back is not None: call_back('Finding files...') call_back(0, 0) file_tuples = [] for root, subdirs, files in os.walk(path): for filename in files: if not filename.lower().endswith('.txt'): continue file_tuples.append((root, filename)) if call_back is not None: call_back('Parsing files...') call_back(0, len(file_tuples)) cur = 0 corpus = SpontaneousSpeechCorpus(corpus_name, path) for i, t in enumerate(file_tuples): if stop_check is not None and stop_check(): return if call_back is not None: call_back('Parsing file {} of {}...'.format( i + 1, len(file_tuples))) call_back(i) root, filename = t name = os.path.splitext(filename)[0] d = load_discourse_ilg(name, os.path.join(root, filename), annotation_types, corpus.lexicon, None, stop_check, call_back) corpus.add_discourse(d) if feature_system_path is not None: feature_matrix = load_binary(feature_system_path) corpus.lexicon.set_feature_matrix(feature_matrix) corpus.lexicon.specifier = modernize.modernize_specifier( corpus.lexicon.specifier) return corpus
def load_discourse_textgrid(corpus_name, path, annotation_types, feature_system_path=None, support_corpus_path=None, stop_check=None, call_back=None): """ Load a discourse from a TextGrid file Parameters ---------- corpus_name : str Informative identifier to refer to corpus path : str Full path to TextGrid file annotation_types : list of AnnotationType List of AnnotationType specifying how to parse the TextGrids. Can be generated through ``inspect_discourse_textgrid``. lexicon : Corpus, optional Corpus to store Discourse word information feature_system_path : str Full path to pickled FeatureMatrix to use with the Corpus stop_check : callable or None Optional function to check whether to gracefully terminate early call_back : callable or None Optional function to supply progress information during the loading Returns ------- Discourse Discourse object generated from the TextGrid file """ data = textgrid_to_data(corpus_name, path, annotation_types, call_back=call_back, stop_check=stop_check) #textgrid_to_data has side-effects that change annotation_types wav_path = find_wav_path(path) if support_corpus_path is not None: if isinstance(support_corpus_path, Corpus): #the corpus is 'preloaded' if this function is called by load_directory_textgrid #otherwise the corpus has to be loaded once per file in a directory, which could be slow support = support_corpus_path else: #otherwise, it's a string representing a path to the corpus support = load_binary(support_corpus_path) else: support = None discourse = data_to_discourse2(corpus_name, wav_path, annotation_types=annotation_types, support_corpus=support, stop_check=stop_check, call_back=call_back) if feature_system_path is not None: feature_matrix = load_binary(feature_system_path) discourse.lexicon.set_feature_matrix(feature_matrix) discourse.lexicon.specifier = modernize.modernize_specifier( discourse.lexicon.specifier) return discourse
def check_feature_coverage_csv(corpus_name, path, delimiter, annotation_types=None, feature_system_path=None, stop_check=None, call_back=None): if feature_system_path is not None and os.path.exists(feature_system_path): feature_matrix = load_binary(feature_system_path) feature_matrix = modernize.modernize_specifier(feature_matrix) if annotation_types is None: annotation_types, delimiter = inspect_csv(path, coldelim=delimiter) for a in annotation_types: a.reset() missing = set() with open(path, encoding='utf-8-sig') as f: headers = f.readline() headers = headers.split(delimiter) if len(headers) == 1: e = DelimiterError(('Could not parse the corpus.\n\Check that the column delimiter you typed in matches ' 'the one used in the file.')) raise e headers = annotation_types for line in f.readlines(): line = line.strip() if not line: continue for k, v in zip(headers, line.split(delimiter)): v = v.strip() if k.attribute.att_type == 'tier': ignored = k.ignored_characters if ignored is not None: v = ''.join(x for x in v if x not in ignored) sd = k.syllable_delimiter if sd is not None: syllables = v.split(sd) else: syllables = [v] td = k.trans_delimiter stress_spec = set(k.stress_specification.keys()) tone_spec = set(k.tone_specification.keys()) supra_spec = stress_spec.union(tone_spec) for syllable in syllables: syllable = ''.join(x for x in syllable if x not in supra_spec) if td is None: if k.digraph_pattern is not None: string = k.digraph_pattern.findall(syllable) else: string = [x for x in syllable] else: string = syllable.split(td) for seg in string: if seg == '': continue if seg not in feature_matrix.segments: missing.add(seg) print('In csv.py', missing)
def load_corpus_csv(corpus_name, path, delimiter, annotation_types = None, feature_system_path = None, stop_check = None, call_back = None): """ Load a corpus from a column-delimited text file Parameters ---------- corpus_name : str Informative identifier to refer to corpus path : str Full path to text file delimiter : str Character to use for spliting lines into columns annotation_types : list of AnnotationType, optional List of AnnotationType specifying how to parse text files feature_system_path : str Full path to pickled FeatureMatrix to use with the Corpus stop_check : callable, optional Optional function to check whether to gracefully terminate early call_back : callable, optional Optional function to supply progress information during the function Returns ------- Corpus Corpus object generated from the text file """ check_feature_coverage_csv(corpus_name, path, delimiter, annotation_types, feature_system_path, stop_check, call_back) corpus = Corpus(corpus_name) if feature_system_path is not None and os.path.exists(feature_system_path): feature_matrix = load_binary(feature_system_path) feature_matrix = modernize.modernize_specifier(feature_matrix) corpus.set_feature_matrix(feature_matrix) if annotation_types is None: annotation_types, delimiter = inspect_csv(path, coldelim=delimiter) for a in annotation_types: a.reset() if call_back is not None: call_back('Loading...') call_back(0, 0) cur = 0 with open(path, encoding='utf-8-sig') as f: headers = f.readline() headers = headers.split(delimiter) if len(headers) == 1: e = DelimiterError(('Could not parse the corpus.\n\Check that the column delimiter you typed in matches ' 'the one used in the file.')) raise e headers = annotation_types for a in headers: corpus.add_attribute(a.attribute) trans_check = True for line in f.readlines(): if stop_check is not None and stop_check(): return if call_back is not None: cur += 1 call_back(cur) line = line.strip() if not line: # blank or just a newline continue d = {} for k, v in zip(headers, line.split(delimiter)): v = v.strip() if k.attribute.att_type == 'tier': trans = parse_transcription(v, k, feature_matrix=feature_matrix, corpus=corpus) # trans is a list of BaseAnnotation if not trans_check and len(trans) > 1: trans_check = True d[k.attribute.name] = (k.attribute, trans) else: d[k.attribute.name] = (k.attribute, v) word = Word(**d) if word.transcription: #transcriptions can have phonetic symbol delimiters if not word.spelling: word.spelling = ''.join(map(str, word.transcription)) corpus.add_word(word, allow_duplicates=True) if corpus.specifier is not None: corpus.inventory.update_features(corpus.specifier) if corpus.has_transcription and any(len(word.transcription) > 1 for word in corpus): if not trans_check: e = DelimiterError(('Could not parse transcriptions with that delimiter. ' '\nCheck that the transcription delimiter you typed ' 'in matches the one used in the file.')) raise e if stop_check is not None and stop_check(): return return corpus
def load_discourse_multiple_files(corpus_name, word_path, phone_path, dialect, annotation_types=None, lexicon=None, feature_system_path=None, stop_check=None, call_back=None): """ Load a discourse from a text file containing interlinear glosses Parameters ---------- corpus_name : str Informative identifier to refer to corpus word_path : str Full path to words text file phone_path : str Full path to phones text file dialect : str Currently, only 'buckeye' annotation_types : list of AnnotationType, optional List of AnnotationType specifying how to parse the glosses. Auto-generated based on dialect. lexicon : Corpus, optional Corpus to store Discourse word information feature_system_path : str Full path to pickled FeatureMatrix to use with the Corpus stop_check : callable or None Optional function to check whether to gracefully terminate early call_back : callable or None Optional function to supply progress information during the loading Returns ------- Discourse Discourse object generated from the text file """ name = os.path.splitext(os.path.split(word_path)[1])[0] discourse_kwargs = { 'name': name, 'wav_path': find_wav_path(word_path), 'other_attributes': list() } for at in annotation_types: if at.name == 'Orthography (default)': discourse_kwargs['spelling_name'] = at.attribute #.output_name elif at.name == 'Transcription (default)': discourse_kwargs[ 'transcription_name'] = at.attribute #.output_name elif at.name == 'Other (character)' or at.attribute.att_type in ( 'tier', 'spelling'): discourse_kwargs['other_attributes'].append(at.attribute) discourse = Discourse(discourse_kwargs) words = read_words(word_path, dialect) ind = 0 for w in words: word_kwargs = { at.output_name: (at.attribute, w[at.output_name]) for at in annotation_types } word = Word(**word_kwargs) word_token_kwargs = dict() for at in annotation_types: if at.ignored: continue word_token_kwargs[at.output_name] = (at.attribute, w[at.output_name]) word_token_kwargs['word'] = word if at.attribute.att_type == 'tier': if at.attribute.is_default: begin = w['begin'] end = w['end'] word_token_kwargs[ 'begin'] = begin if begin is not None else ind word_token_kwargs[ 'end'] = end if end is not None else ind + 1 if at.token: word_token_kwargs['_transcription'] = (at.attribute, w['transcription']) word_token = WordToken(**word_token_kwargs) word.wordtokens.append(word_token) discourse.lexicon.add_word(word) discourse.add_word(word_token) ind += 1 if feature_system_path is not None: feature_matrix = load_binary(feature_system_path) discourse.lexicon.set_feature_matrix(feature_matrix) discourse.lexicon.specifier = modernize.modernize_specifier( discourse.lexicon.specifier) return discourse
def load_directory_multiple_files(corpus_name, path, dialect, annotation_types=None, feature_system_path=None, stop_check=None, call_back=None): """ Loads a directory of corpus standard files (separated into words files and phones files) Parameters ---------- corpus_name : str Name of corpus path : str Path to directory of text files dialect : str Currently only 'buckeye' annotation_types : list of AnnotationType, optional List of AnnotationType specifying how to parse the glosses. Auto-generated based on dialect. feature_system_path : str, optional File path of FeatureMatrix binary to specify segments stop_check : callable or None Optional function to check whether to gracefully terminate early call_back : callable or None Optional function to supply progress information during the loading Returns ------- SpontaneousSpeechCorpus Corpus containing Discourses corresponding to the text files """ if call_back is not None: call_back('Finding files...') call_back(0, 0) file_tuples = [] for root, subdirs, files in os.walk(path): for filename in files: if stop_check is not None and stop_check(): return if not (filename.lower().endswith('.words') or filename.lower().endswith('.wrd')): continue file_tuples.append((root, filename)) if call_back is not None: call_back('Parsing files...') call_back(0, len(file_tuples)) cur = 0 corpus = SpontaneousSpeechCorpus(corpus_name, path) for i, t in enumerate(file_tuples): if stop_check is not None and stop_check(): return if call_back is not None: call_back('Parsing file {} of {}...'.format( i + 1, len(file_tuples))) call_back(i) root, filename = t name, ext = os.path.splitext(filename) if ext == '.words': phone_ext = '.phones' word_path = os.path.join(root, filename) phone_path = os.path.splitext(word_path)[0] + phone_ext try: d = load_discourse_multiple_files(name, word_path, phone_path, dialect, annotation_types, corpus.lexicon, feature_system_path, stop_check, None) corpus.add_discourse(d) except ValueError: print('Error importing for participant ' + name) if feature_system_path is not None: feature_matrix = load_binary(feature_system_path) corpus.lexicon.set_feature_matrix(feature_matrix) corpus.lexicon.specifier = modernize.modernize_specifier( corpus.lexicon.specifier) return corpus
def load_discourse_multiple_files(corpus_name, word_path, phone_path, dialect, annotation_types = None, lexicon = None, feature_system_path = None, stop_check = None, call_back = None): """ Load a discourse from a text file containing interlinear glosses Parameters ---------- corpus_name : str Informative identifier to refer to corpus word_path : str Full path to words text file phone_path : str Full path to phones text file dialect : str Currently, only 'buckeye' annotation_types : list of AnnotationType, optional List of AnnotationType specifying how to parse the glosses. Auto-generated based on dialect. lexicon : Corpus, optional Corpus to store Discourse word information feature_system_path : str Full path to pickled FeatureMatrix to use with the Corpus stop_check : callable or None Optional function to check whether to gracefully terminate early call_back : callable or None Optional function to supply progress information during the loading Returns ------- Discourse Discourse object generated from the text file """ name = os.path.splitext(os.path.split(word_path)[1])[0] discourse_kwargs = {'name': name, 'wav_path': find_wav_path(word_path), 'other_attributes': list()} for at in annotation_types: if at.name == 'Orthography (default)': discourse_kwargs['spelling_name'] = at.attribute#.output_name elif at.name == 'Transcription (default)': discourse_kwargs['transcription_name'] = at.attribute#.output_name elif at.name == 'Other (character)' or at.attribute.att_type in ('tier', 'spelling'): discourse_kwargs['other_attributes'].append(at.attribute) discourse = Discourse(discourse_kwargs) words = read_words(word_path, dialect) ind = 0 for w in words: word_kwargs = {at.output_name: (at.attribute, w[at.output_name]) for at in annotation_types} word = Word(**word_kwargs) word_token_kwargs = dict() for at in annotation_types: if at.ignored: continue word_token_kwargs[at.output_name] = (at.attribute, w[at.output_name]) word_token_kwargs['word'] = word if at.attribute.att_type == 'tier': if at.attribute.is_default: begin = w['begin'] end = w['end'] word_token_kwargs['begin'] = begin if begin is not None else ind word_token_kwargs['end'] = end if end is not None else ind + 1 if at.token: word_token_kwargs['_transcription'] = (at.attribute, w['transcription']) word_token = WordToken(**word_token_kwargs) word.wordtokens.append(word_token) discourse.lexicon.add_word(word) discourse.add_word(word_token) ind += 1 if feature_system_path is not None: feature_matrix = load_binary(feature_system_path) discourse.lexicon.set_feature_matrix(feature_matrix) discourse.lexicon.specifier = modernize.modernize_specifier(discourse.lexicon.specifier) return discourse
def load_directory_multiple_files(corpus_name, path, dialect, annotation_types = None, feature_system_path = None, stop_check = None, call_back = None): """ Loads a directory of corpus standard files (separated into words files and phones files) Parameters ---------- corpus_name : str Name of corpus path : str Path to directory of text files dialect : str Currently only 'buckeye' annotation_types : list of AnnotationType, optional List of AnnotationType specifying how to parse the glosses. Auto-generated based on dialect. feature_system_path : str, optional File path of FeatureMatrix binary to specify segments stop_check : callable or None Optional function to check whether to gracefully terminate early call_back : callable or None Optional function to supply progress information during the loading Returns ------- SpontaneousSpeechCorpus Corpus containing Discourses corresponding to the text files """ if call_back is not None: call_back('Finding files...') call_back(0, 0) file_tuples = [] for root, subdirs, files in os.walk(path): for filename in files: if stop_check is not None and stop_check(): return if not (filename.lower().endswith('.words') or filename.lower().endswith('.wrd')): continue file_tuples.append((root, filename)) if call_back is not None: call_back('Parsing files...') call_back(0,len(file_tuples)) cur = 0 corpus = SpontaneousSpeechCorpus(corpus_name, path) for i, t in enumerate(file_tuples): if stop_check is not None and stop_check(): return if call_back is not None: call_back('Parsing file {} of {}...'.format(i+1, len(file_tuples))) call_back(i) root, filename = t name,ext = os.path.splitext(filename) if ext == '.words': phone_ext = '.phones' word_path = os.path.join(root,filename) phone_path = os.path.splitext(word_path)[0] + phone_ext try: d = load_discourse_multiple_files(name, word_path, phone_path, dialect, annotation_types, corpus.lexicon, feature_system_path, stop_check, None) corpus.add_discourse(d) except ValueError: print('Error importing for participant ' + name) if feature_system_path is not None: feature_matrix = load_binary(feature_system_path) corpus.lexicon.set_feature_matrix(feature_matrix) corpus.lexicon.specifier = modernize.modernize_specifier(corpus.lexicon.specifier) return corpus