コード例 #1
0
ファイル: pct_corpus.py プロジェクト: FieldDB/CorpusTools
def main():

    #### Parse command-line arguments
    parser = argparse.ArgumentParser(description = \
             'Phonological CorpusTools: corpus object creation CL interface')
    parser.add_argument('csv_file_name', help='Name of input CSV file')
    parser.add_argument('-f', '--feature_file_name', default = '', type=str, help='Name of input feature file')
    parser.add_argument('-d', '--delimiter', default='\t', type=str, help='Character that delimits columns in the input file')
    parser.add_argument('-t', '--trans_delimiter', default='', type=str, help='Character that delimits segments in the input file')

    args = parser.parse_args()

    ####

    delimiter = codecs.getdecoder("unicode_escape")(args.delimiter)[0]

    try: # Full path specified
        filename, extension = os.path.splitext(args.csv_file_name)
        filename = path_leaf(filename)
        corpus = load_corpus_csv(args.csv_file_name, args.csv_file_name,
                delimiter, args.trans_delimiter, args.feature_file_name)
        save_binary(corpus, filename+'.corpus')
    except FileNotFoundError:
        #FIXME! os.path.join takes care of os specific paths
        try: # Unix filepaths
            filename, extension = os.path.splitext(os.path.dirname(os.path.realpath(__file__))+'/'+args.csv_file_name)
            corpus = load_corpus_csv(args.csv_file_name, os.path.dirname(os.path.realpath(__file__))+'/'+args.csv_file_name,
                    delimiter, args.trans_delimiter, os.path.dirname(os.path.realpath(__file__))+'/'+args.feature_file_name)
            save_binary(corpus, filename+'.corpus')
        except FileNotFoundError: # Windows filepaths
            filename, extension = os.path.splitext(os.path.dirname(os.path.realpath(__file__))+'\\'+args.csv_file_name)
            corpus = load_corpus_csv(args.csv_file_name, os.path.dirname(os.path.realpath(__file__))+'\\'+args.csv_file_name,
                    delimiter, args.trans_delimiter, os.path.dirname(os.path.realpath(__file__))+'\\'+args.feature_file_name)
            save_binary(corpus, filename+'.corpus')
コード例 #2
0
def test_corpus_csv(csv_test_dir, unspecified_test_corpus):
    example_path = os.path.join(csv_test_dir, 'example.txt')
    with pytest.raises(DelimiterError):
        load_corpus_csv('example',example_path,delimiter='\t')
    #with pytest.raises(DelimiterError):
    #    load_corpus_csv('example',example_path,delimiter=',')


    c = load_corpus_csv('example',example_path,delimiter=',')

    assert(isinstance(c, Corpus))
    assert(c == unspecified_test_corpus)
コード例 #3
0
def test_stressed(csv_test_dir):
    stressed_path = os.path.join(csv_test_dir, 'stressed.txt')
    ats,_ = inspect_csv(stressed_path, coldelim = ',')
    print(ats)
    ats[1].number_behavior = 'stress'
    c = load_corpus_csv('stressed',stressed_path,',', ats)
    assert(c.inventory['uw'].symbol == 'uw')
    assert(c.inventory.stresses == {'1': set(['uw','iy']),
                                    '0': set(['uw','iy','ah'])})
コード例 #4
0
ファイル: iogui.py プロジェクト: pedmiston/CorpusTools
    def run(self):
        time.sleep(0.1)
        textType = self.kwargs.pop('text_type')
        isDirectory = self.kwargs.pop('isDirectory')
        logging.info('Importing {} corpus named {}'.format(
            textType, self.kwargs['corpus_name']))
        logging.info('Path: '.format(self.kwargs['path']))
        log_annotation_types(self.kwargs['annotation_types'])
        try:
            if textType == 'spelling':

                if isDirectory:
                    corpus = load_directory_spelling(**self.kwargs)
                else:
                    corpus = load_discourse_spelling(**self.kwargs)
            elif textType == 'transcription':

                if isDirectory:
                    corpus = load_directory_transcription(**self.kwargs)
                else:
                    corpus = load_discourse_transcription(**self.kwargs)
            elif textType == 'ilg':

                if isDirectory:
                    corpus = load_directory_ilg(**self.kwargs)
                else:
                    corpus = load_discourse_ilg(**self.kwargs)
            elif textType == 'textgrid':
                if isDirectory:
                    corpus = load_directory_textgrid(**self.kwargs)
                else:
                    corpus = load_discourse_textgrid(**self.kwargs)
            elif textType == 'csv':
                corpus = load_corpus_csv(**self.kwargs)
            elif textType in ['buckeye', 'timit']:
                self.kwargs['dialect'] = textType
                if isDirectory:
                    corpus = load_directory_multiple_files(**self.kwargs)
                else:
                    corpus = load_discourse_multiple_files(**self.kwargs)
        except PCTError as e:
            self.errorEncountered.emit(e)
            return
        except Exception as e:
            e = PCTPythonError(e)
            self.errorEncountered.emit(e)
            return
        if self.stopped:
            time.sleep(0.1)
            self.finishedCancelling.emit()
            return
        self.dataReady.emit(corpus)
コード例 #5
0
ファイル: iogui.py プロジェクト: adilnurimanov/CorpusTools
    def run(self):
        time.sleep(0.1)
        textType = self.kwargs.pop('text_type')
        isDirectory = self.kwargs.pop('isDirectory')
        logging.info('Importing {} corpus named {}'.format(textType, self.kwargs['corpus_name']))
        logging.info('Path: '.format(self.kwargs['path']))
        log_annotation_types(self.kwargs['annotation_types'])
        try:
            if textType == 'spelling':

                if isDirectory:
                    corpus = load_directory_spelling(**self.kwargs)
                else:
                    corpus = load_discourse_spelling(**self.kwargs)
            elif textType == 'transcription':

                if isDirectory:
                    corpus = load_directory_transcription(**self.kwargs)
                else:
                    corpus = load_discourse_transcription(**self.kwargs)
            elif textType == 'ilg':

                if isDirectory:
                    corpus = load_directory_ilg(**self.kwargs)
                else:
                    corpus = load_discourse_ilg(**self.kwargs)
            elif textType == 'textgrid':
                if isDirectory:
                    corpus = load_directory_textgrid(**self.kwargs)
                else:
                    corpus = load_discourse_textgrid(**self.kwargs)
            elif textType == 'csv':
                corpus = load_corpus_csv(**self.kwargs)
            elif textType in ['buckeye', 'timit']:
                self.kwargs['dialect'] = textType
                if isDirectory:
                    corpus = load_directory_multiple_files(**self.kwargs)
                else:
                    corpus = load_discourse_multiple_files(**self.kwargs)
        except PCTError as e:
            self.errorEncountered.emit(e)
            return
        except Exception as e:
            e = PCTPythonError(e)
            self.errorEncountered.emit(e)
            return
        if self.stopped:
            time.sleep(0.1)
            self.finishedCancelling.emit()
            return
        self.dataReady.emit(corpus)
コード例 #6
0
ファイル: pct_corpus.py プロジェクト: pedmiston/CorpusTools
def main():

    #### Parse command-line arguments
    parser = argparse.ArgumentParser(description = \
             'Phonological CorpusTools: corpus object creation CL interface')
    parser.add_argument('csv_file_name', help='Name of input CSV file')
    parser.add_argument('-f', '--feature_file_name', default = '', type=str, help='Name of input feature file')
    parser.add_argument('-d', '--delimiter', default=None, type=str, help='Character that delimits columns in the input file')
    parser.add_argument('-t', '--trans_delimiter', default=None, type=str, help='Character that delimits segments in the input file')

    args = parser.parse_args()

    ####
    if args.delimiter:
        delimiter = codecs.getdecoder("unicode_escape")(args.delimiter)[0]
    else:
        delimiter = args.delimiter

    try: # Full path specified
        filename, extension = os.path.splitext(args.csv_file_name)
        filename = path_leaf(filename)
        corpus = load_corpus_csv(args.csv_file_name, args.csv_file_name,
                delimiter, args.trans_delimiter, annotation_types=None, feature_system_path=args.feature_file_name)
        save_binary(corpus, filename+'.corpus')
    except FileNotFoundError:
        #TO-DO: os.path.join takes care of os specific paths
        try: # Unix filepaths
            filename, extension = os.path.splitext(os.path.dirname(os.path.realpath(__file__))+'/'+args.csv_file_name)
            corpus = load_corpus_csv(args.csv_file_name, os.path.dirname(os.path.realpath(__file__))+'/'+args.csv_file_name,
                    delimiter, args.trans_delimiter, annotation_types=None, feature_system_path=os.path.dirname(os.path.realpath(__file__))+'/'+args.feature_file_name)
            save_binary(corpus, filename+'.corpus')
        except FileNotFoundError: # Windows filepaths
            filename, extension = os.path.splitext(os.path.dirname(os.path.realpath(__file__))+'\\'+args.csv_file_name)
            corpus = load_corpus_csv(args.csv_file_name, os.path.dirname(os.path.realpath(__file__))+'\\'+args.csv_file_name,
                    delimiter, args.trans_delimiter, annotation_types=None, feature_system_path=os.path.dirname(os.path.realpath(__file__))+'\\'+args.feature_file_name)
            save_binary(corpus, filename+'.corpus')