def __init__(self, fname):
        ConfigurationBase.__init__(self, fname)

        self.keywords_files = self.__getstring__("DEFAULT", "keyword_files").split(",")
        self.num_clusters          = self.__getint__("DEFAULT", "num_clusters")
        self.model_file     = self.__getstring__("DEFAULT", "word2vec_model_file")
        self.synonyms_file  = self.__getstring__("DEFAULT", "synonyms_file")
Beispiel #2
0
    def __init__(self, fname):
        ConfigurationBase.__init__(self, fname)

        self.keywords_files = self.__getstring__("DEFAULT",
                                                 "keyword_files").split(",")
        self.num_clusters = self.__getint__("DEFAULT", "num_clusters")
        self.model_file = self.__getstring__("DEFAULT", "word2vec_model_file")
        self.synonyms_file = self.__getstring__("DEFAULT", "synonyms_file")
    def __init__(self, fname):
        ConfigurationBase.__init__(self, fname)

        self.keywords_files = self.__getstring__("DEFAULT", "keyword_files").split(",")
        self.top_n          = self.__getint__("DEFAULT", "top_n")
        self.model_file     = self.__getstring__("DEFAULT", "word2vec_model_file")
        self.payload_synonyms_file  = self.__getstring__("DEFAULT", "payload_synonyms_file")
        self.synonyms_file  = self.__getstring__("DEFAULT", "synonyms_file")
Beispiel #4
0
    def __init__(self, config_file):
        ConfigurationBase.__init__(self, config_file)
        self.documents_folder                   = self.__getstring__("DEFAULT", "documents_folder")
        self.file_mask                          = self.__getstring__("DEFAULT", "file_mask")
        self.processed_documents_folder         = self.__getfilepath__("DEFAULT", "processed_documents_folder")

        self.empty_processed_documents_folder   = self.__getbool__("DEFAULT", "empty_processed_documents_folder")
        self.parse_html                         = self.__getbool__("DEFAULT", "parse_html")
        self.minimum_file_size_chars            = self.__getint__("DEFAULT", "minimum_file_size_chars")
    def __init__(self, fname):
        ConfigurationBase.__init__(self, fname)

        self.keywords_files = self.__getstring__("DEFAULT",
                                                 "keyword_files").split(",")
        self.top_n = self.__getint__("DEFAULT", "top_n")
        self.model_file = self.__getstring__("DEFAULT", "word2vec_model_file")
        self.payload_synonyms_file = self.__getstring__(
            "DEFAULT", "payload_synonyms_file")
        self.synonyms_file = self.__getstring__("DEFAULT", "synonyms_file")
    def __init__(self, config_file):
        ConfigurationBase.__init__(self, config_file)
        self.processed_documents_folder = self.__getfilepath__("DEFAULT", "processed_documents_folder")
        self.file_mask                  = self.__getstring__("DEFAULT", "file_mask")

        self.min_document_frequency     = self.__getint__("DEFAULT", "min_document_frequency")
        self.max_phrase_length          = self.__getint__("DEFAULT", "max_phrase_length")
        self.max_proportion_documents   = self.__getfloat__("DEFAULT", "max_proportion_documents")

        self.stop_words_file            = self.__getstring__("DEFAULT", "stop_words_file")
        self.keywords_file              = self.__getstring__("DEFAULT", "keywords_file")
Beispiel #7
0
    def __init__(self, config_file):
        ConfigurationBase.__init__(self, config_file)
        self.processed_documents_folder = self.__getfilepath__(
            "DEFAULT", "processed_documents_folder")
        self.file_mask = self.__getstring__("DEFAULT", "file_mask")

        self.min_document_frequency = self.__getint__(
            "DEFAULT", "min_document_frequency")
        self.max_phrase_length = self.__getint__("DEFAULT",
                                                 "max_phrase_length")
        self.max_proportion_documents = self.__getfloat__(
            "DEFAULT", "max_proportion_documents")

        self.stop_words_file = self.__getstring__("DEFAULT", "stop_words_file")
        self.keywords_file = self.__getstring__("DEFAULT", "keywords_file")
    def __init__(self, fname):
        ConfigurationBase.__init__(self, fname)

        self.keywords_files = self.__getstring__("DEFAULT", "keyword_files").split(",")

        self.processed_documents_folder = self.__getfilepath__("DEFAULT", "processed_documents_folder")
        self.stop_words_file = self.__getstring__("DEFAULT", "stop_words_file")
        self.file_mask = self.__getstring__("DEFAULT", "file_mask")
        self.min_sentence_length_words = self.__getint__("DEFAULT", "min_sentence_length_words")
        self.case_sensitive = self.__getbool__("DEFAULT", "case_sensitive")

        #Word2Vec training settings
        self.model_file = self.__getstring__("WORD2VEC", "word2vec_model_file")
        self.window_size = self.__getint__("WORD2VEC", "window_size")
        self.min_word_count = self.__getint__("WORD2VEC", "min_word_count")
        self.vector_size = self.__getint__("WORD2VEC", "vector_size")
        self.workers = self.__getint__("WORD2VEC", "workers")
        self.training_iterations = self.__getint__("WORD2VEC", "training_iterations")
        self.training_iterations = self.__getint__("WORD2VEC", "training_iterations")
    def __init__(self, fname):
        ConfigurationBase.__init__(self, fname)

        self.keywords_files = self.__getstring__("DEFAULT",
                                                 "keyword_files").split(",")

        self.processed_documents_folder = self.__getfilepath__(
            "DEFAULT", "processed_documents_folder")
        self.stop_words_file = self.__getstring__("DEFAULT", "stop_words_file")
        self.file_mask = self.__getstring__("DEFAULT", "file_mask")
        self.min_sentence_length_words = self.__getint__(
            "DEFAULT", "min_sentence_length_words")
        self.case_sensitive = self.__getbool__("DEFAULT", "case_sensitive")

        #Word2Vec training settings
        self.model_file = self.__getstring__("WORD2VEC", "word2vec_model_file")
        self.window_size = self.__getint__("WORD2VEC", "window_size")
        self.min_word_count = self.__getint__("WORD2VEC", "min_word_count")
        self.vector_size = self.__getint__("WORD2VEC", "vector_size")
        self.workers = self.__getint__("WORD2VEC", "workers")
        self.training_iterations = self.__getint__("WORD2VEC",
                                                   "training_iterations")
        self.training_iterations = self.__getint__("WORD2VEC",
                                                   "training_iterations")