def __init__(self, fname): ConfigurationBase.__init__(self, fname) self.keywords_files = self.__getstring__("DEFAULT", "keyword_files").split(",") self.num_clusters = self.__getint__("DEFAULT", "num_clusters") self.model_file = self.__getstring__("DEFAULT", "word2vec_model_file") self.synonyms_file = self.__getstring__("DEFAULT", "synonyms_file")
def __init__(self, fname): ConfigurationBase.__init__(self, fname) self.keywords_files = self.__getstring__("DEFAULT", "keyword_files").split(",") self.top_n = self.__getint__("DEFAULT", "top_n") self.model_file = self.__getstring__("DEFAULT", "word2vec_model_file") self.payload_synonyms_file = self.__getstring__("DEFAULT", "payload_synonyms_file") self.synonyms_file = self.__getstring__("DEFAULT", "synonyms_file")
def __init__(self, config_file): ConfigurationBase.__init__(self, config_file) self.documents_folder = self.__getstring__("DEFAULT", "documents_folder") self.file_mask = self.__getstring__("DEFAULT", "file_mask") self.processed_documents_folder = self.__getfilepath__("DEFAULT", "processed_documents_folder") self.empty_processed_documents_folder = self.__getbool__("DEFAULT", "empty_processed_documents_folder") self.parse_html = self.__getbool__("DEFAULT", "parse_html") self.minimum_file_size_chars = self.__getint__("DEFAULT", "minimum_file_size_chars")
def __init__(self, fname): ConfigurationBase.__init__(self, fname) self.keywords_files = self.__getstring__("DEFAULT", "keyword_files").split(",") self.top_n = self.__getint__("DEFAULT", "top_n") self.model_file = self.__getstring__("DEFAULT", "word2vec_model_file") self.payload_synonyms_file = self.__getstring__( "DEFAULT", "payload_synonyms_file") self.synonyms_file = self.__getstring__("DEFAULT", "synonyms_file")
def __init__(self, config_file): ConfigurationBase.__init__(self, config_file) self.processed_documents_folder = self.__getfilepath__("DEFAULT", "processed_documents_folder") self.file_mask = self.__getstring__("DEFAULT", "file_mask") self.min_document_frequency = self.__getint__("DEFAULT", "min_document_frequency") self.max_phrase_length = self.__getint__("DEFAULT", "max_phrase_length") self.max_proportion_documents = self.__getfloat__("DEFAULT", "max_proportion_documents") self.stop_words_file = self.__getstring__("DEFAULT", "stop_words_file") self.keywords_file = self.__getstring__("DEFAULT", "keywords_file")
def __init__(self, config_file): ConfigurationBase.__init__(self, config_file) self.processed_documents_folder = self.__getfilepath__( "DEFAULT", "processed_documents_folder") self.file_mask = self.__getstring__("DEFAULT", "file_mask") self.min_document_frequency = self.__getint__( "DEFAULT", "min_document_frequency") self.max_phrase_length = self.__getint__("DEFAULT", "max_phrase_length") self.max_proportion_documents = self.__getfloat__( "DEFAULT", "max_proportion_documents") self.stop_words_file = self.__getstring__("DEFAULT", "stop_words_file") self.keywords_file = self.__getstring__("DEFAULT", "keywords_file")
def __init__(self, fname): ConfigurationBase.__init__(self, fname) self.keywords_files = self.__getstring__("DEFAULT", "keyword_files").split(",") self.processed_documents_folder = self.__getfilepath__("DEFAULT", "processed_documents_folder") self.stop_words_file = self.__getstring__("DEFAULT", "stop_words_file") self.file_mask = self.__getstring__("DEFAULT", "file_mask") self.min_sentence_length_words = self.__getint__("DEFAULT", "min_sentence_length_words") self.case_sensitive = self.__getbool__("DEFAULT", "case_sensitive") #Word2Vec training settings self.model_file = self.__getstring__("WORD2VEC", "word2vec_model_file") self.window_size = self.__getint__("WORD2VEC", "window_size") self.min_word_count = self.__getint__("WORD2VEC", "min_word_count") self.vector_size = self.__getint__("WORD2VEC", "vector_size") self.workers = self.__getint__("WORD2VEC", "workers") self.training_iterations = self.__getint__("WORD2VEC", "training_iterations") self.training_iterations = self.__getint__("WORD2VEC", "training_iterations")
def __init__(self, fname): ConfigurationBase.__init__(self, fname) self.keywords_files = self.__getstring__("DEFAULT", "keyword_files").split(",") self.processed_documents_folder = self.__getfilepath__( "DEFAULT", "processed_documents_folder") self.stop_words_file = self.__getstring__("DEFAULT", "stop_words_file") self.file_mask = self.__getstring__("DEFAULT", "file_mask") self.min_sentence_length_words = self.__getint__( "DEFAULT", "min_sentence_length_words") self.case_sensitive = self.__getbool__("DEFAULT", "case_sensitive") #Word2Vec training settings self.model_file = self.__getstring__("WORD2VEC", "word2vec_model_file") self.window_size = self.__getint__("WORD2VEC", "window_size") self.min_word_count = self.__getint__("WORD2VEC", "min_word_count") self.vector_size = self.__getint__("WORD2VEC", "vector_size") self.workers = self.__getint__("WORD2VEC", "workers") self.training_iterations = self.__getint__("WORD2VEC", "training_iterations") self.training_iterations = self.__getint__("WORD2VEC", "training_iterations")