Esempio n. 1
0
 def __init__(self, corpus_folder, taxonomy: Taxonomy):
     Corpus.__init__(self, "AMI", corpus_folder, taxonomy)
     self.test_files = [
         "ES2004", "ES2014", "IS1009", "TS3003", "TS3007", "EN2002"
     ]
     corpus = self.load_corpus(corpus_folder)
     self.utterances = self.parse_corpus(corpus)
 def __init__(self, midas_folder, taxonomy: Taxonomy):
     Corpus.__init__(self, "MIDAS", midas_folder, taxonomy)
     self.files = {
         "train": "train.txt",
         "dev": "dev.txt",
         "test": "test.txt"
     }
     corpus = self.load_corpus(midas_folder)
     self.utterances = self.parse_corpus(corpus)
Esempio n. 3
0
 def __init__(self, corpus_folder, taxonomy: Taxonomy):
     Corpus.__init__(self, "AMI", corpus_folder, taxonomy)
     corpus = self.load_corpus(corpus_folder)
     self.utterances = self.parse_corpus(corpus)
Esempio n. 4
0
 def __init__(self, maptask_folder, taxonomy: Taxonomy):
     Corpus.__init__(self, "Maptask", maptask_folder, taxonomy)
     corpus = self.load_corpus(maptask_folder)
     self.utterances = self.parse_corpus(corpus)
 def __init__(self, switchboard_folder, taxonomy: Taxonomy):
     Corpus.__init__(self, "Switchboard", switchboard_folder, taxonomy)
     corpus = self.load_corpus(switchboard_folder)
     self.utterances = self.parse_corpus(corpus)
Esempio n. 6
0
 def __init__(self, switchboard_folder, taxonomy: Taxonomy):
     Corpus.__init__(self, "Switchboard", switchboard_folder, taxonomy)
     self.test_files = ["sw11", "sw12", "sw13"]
     corpus = self.load_corpus(switchboard_folder)
     self.utterances = self.parse_corpus(corpus)
 def __init__(self, daily_dialog_folder, taxonomy: Taxonomy):
     Corpus.__init__(self, "DailyDialog", daily_dialog_folder, taxonomy)
     corpus = self.load_corpus(daily_dialog_folder)
     self.utterances = self.parse_corpus(corpus)
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import argparse
from corpora.corpus import Corpus

if __name__ == '__main__':
    parser = argparse.ArgumentParser(
        description="generate dictionary for a corpus")
    parser.add_argument(
        'parsed_document',
        help="python pickle file, containing tokens and metadata")
    parser.add_argument('dictionary', help="output dictionary")
    args = parser.parse_args()

    print("loading corpus")
    corpus = Corpus.load(args.parsed_document)
    print("generate dictionary")
    corpus.generate_dictionary()
    print("saving dictionary")
    corpus.save_dictionary(args.dictionary)