Ejemplo n.º 1
0
def txt_analyzer(txt_file, number_of_cat):
    trp = TxtRequirementProcessor(txt_file, number_of_cat)
    corpus_root, fileid = trp.split_path(txt_file)
    raw_text = trp.search_fileid(corpus_root, fileid)
    text_no_punct_list, \
    stopwords_list = trp.list_raw_txt_file(raw_text, 
                                           STOPWORDS)
    text_alpha_no_punct_stopword_list = \
        trp.split_stopwords(text_no_punct_list, 
                            stopwords_list)
    lemmatized_list_by_verb_noun_adj_adv = \
        trp.lemmatize_text_as_list(text_alpha_no_punct_stopword_list)
    categories_directory, \
    boolean_for_directory_test, \
    tmp_root, tmp_folderid = trp.create_temp_directory
    wordtype_categories = \
        trp.list_wordtypes_from_lemmatized_list(lemmatized_list_by_verb_noun_adj_adv, 
                                                number_of_cat)
    category_tmp_file_list, \
    boolean_for_file_test = \
        trp.create_temp_files_named_by_wordtypes(wordtype_categories, 
                                                 categories_directory)
    category_tmp_file_content, \
    boolean_for_content_test = \
        trp.assign_temp_files_txt_content(wordtype_categories, 
                                          lemmatized_list_by_verb_noun_adj_adv)
    category_tmp_file_list, \
    boolean_for_check_content_file_test = \
        trp.assign_temp_content_to_temp_files(category_tmp_file_content, 
                                              category_tmp_file_list)
    reader, \
    boolean_for_categories_test = trp.create_categorized_corpus(categories_directory)
    trp.tabulate_categorized_words(reader, number_of_cat)
    trp.plot_txt_results(lemmatized_list_by_verb_noun_adj_adv, 
                         number_of_cat)
    boolean_for_file_test = trp.delete_temporary_files(category_tmp_file_list)
    boolean_for_directory_test = trp.remove_categories_directory(categories_directory, 
                                                                 tmp_root, 
                                                                 tmp_folderid)
class TestTxtRequirementProcessor(TestCase):

    def setUp(self):
        self.v = Variables()
        self.trp = TxtRequirementProcessor(self.v.path, self.v.n_cat)
    
    def it_receives_a_path(self):
        self.trp.txt_file |should| equal_to(self.v.path)
        
    def it_splits_corpus_root_and_fileid_from_path(self):
        self.trp.split_path(self.v.path) |should| equal_to((self.v.root, self.v.fileid))
        
    def it_searches_fileid_in_a_folder_and_returns_its_string_content(self):
        self.trp.search_fileid(self.v.corpus_root, self.v.spec_fileid) \
        |should| equal_to(self.v.concise_raw_text)

    def it_lists_raw_text_and_stopwords_in_a_tuple(self):
        self.trp.list_raw_txt_file(self.v.concise_raw_text, self.v.concise_stopwords) \
        |should| equal_to((self.v.concise_text_list, self.v.concise_stopwords_list))

    def it_splits_stopwords_and_numbers_from_text_as_a_list(self):
        self.trp.split_stopwords(self.v.concise_text_list, self.v.concise_stopwords_list) \
        |should| equal_to(self.v.text_list_free_from_stopwords)

    def it_lists_lemmatized_verb_noun_and_adjective(self):
        self.trp.lemmatize_text_as_list(self.v.text_list_free_from_stopwords) \
        |should| equal_to(self.v.lemmatized_list)

    def it_creates_temporary_directory(self):
        self.trp.create_temp_directory[1] |should| equal_to(True)

    def it_creates_wordtypes_from_lemmatized_list_of_words(self):
        self.trp.list_wordtypes_from_lemmatized_list(self.v.lemmatized_list, self.v.n_cat) \
        |should| equal_to(self.v.wordtypes_list)
    
    def it_creates_temporary_files_named_by_wordtypes_inside_temporary_directory(self):
        self.trp.create_temp_files_named_by_wordtypes(self.v.wordtypes_list, self.v.temporary_directory)[1] \
        |should| equal_to(False) 

    def it_assigns_temporary_file_content_by_wordtype_to_a_list(self):
        self.trp.assign_temp_files_txt_content(self.v.wordtypes_list, self.v.lemmatized_list)[1] \
        |should| equal_to(True)

    def it_assigns_temporary_content_to_temporary_files(self):
        obj = self.trp.create_temp_files_named_by_wordtypes(self.v.wordtypes_list, self.v.temporary_directory)[0]
        self.trp.assign_temp_content_to_temp_files(self.v.categories_content, obj)[1] \
        |should| equal_to(True)

    def it_creates_categorized_plaintextcorpusreader(self):
        self.trp.create_categorized_corpus(self.trp.create_temp_directory[0])[1] \
        |should| equal_to(True)     

    def it_deletes_all_wordtypes_temporary_files(self):
        obj = self.trp.create_temp_files_named_by_wordtypes(self.v.wordtypes_list, self.v.temporary_directory)[0]
        self.trp.delete_temporary_files(obj) |should| equal_to(True)
    
    def it_checks_temporary_directory_was_removed_from_tmp_folder(self):
        categories_directory = self.trp.create_temp_directory[0]
        tmp_folderid = self.trp.create_temp_directory[3]
        self.trp.remove_categories_directory(categories_directory, self.v.temporary_directory, tmp_folderid) \
        |should| equal_to(False) # True