Python Writer Examples

Programming Language: Python

Namespace/Package Name: Model.WriteFile

Class/Type: Writer

Examples at hotexamples.com: 8

Python Writer - 8 examples found. These are the top rated real world Python examples of Model.WriteFile.Writer extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

load_cache(1)

load_documents(1)

load_final_dictionary(1)

remove_temp_file(1)

reset(1)

save_cache(1)

save_documents(1)

save_final_dictionary(1)

save_query_file_results(1)

save_regular_result(1)

Example #1

0

Show file

File: Control.py Project: nevoit/NT-Search-Engine

 def reset(self):
     global cache_dictionary
     global final_dictionary
     global documents_dictionary
     cache_dictionary = None
     final_dictionary = None
     documents_dictionary = None
     Indexer.reset()
     Writer.reset()
     Stemmer.reset()
     Reader.reset()

Example #2

0

Show file

File: Control.py Project: nevoit/NT-Search-Engine

    def load_data(self, folder_path, stem_mode):
        global documents_dictionary
        global cache_dictionary
        global final_dictionary
        if final_dictionary is None or cache_dictionary is None or documents_dictionary is None:
            pass
        else:
            final_dictionary.clear()
            cache_dictionary.clear()
            documents_dictionary.clear()

        documents_dictionary = Writer.load_documents(folder_path, stem_mode)
        cache_dictionary = Writer.load_cache(folder_path, stem_mode)
        final_dictionary = Writer.load_final_dictionary(folder_path, stem_mode)

Example #3

0

Show file

File: Control.py Project: nevoit/NT-Search-Engine

    def run(self):
        global cache_dictionary
        global final_dictionary
        global documents_dictionary
        start_time = time.time()
        cache_dictionary = {}
        final_dictionary = {}
        documents_dictionary = {}

        # Creates a list with all of the file paths in the corpus. Pops to remove the corpus file path
        sub_dirs = [x[0] for x in os.walk(corpus_path)]
        sub_dirs.pop(0)

        files_list = []  # This list will save each part
        file_index = 1  # This index point to current file
        iterate_over_parts = 1  # This part point to the current part

        next_part = int(
            fileNum /
            parts) * iterate_over_parts  # The last index of the first part
        if thread_mode == 'on':  # Here we using ThreadPool
            # Init for ThreadPool with number of threads from config file
            executor = concurrent.futures.ThreadPoolExecutor(
                max_workers=number_of_threads)
            for subdir in sub_dirs:
                textList = Reader.separate(subdir)
                files_list.extend(textList)
                if file_index == next_part:
                    executor.submit(handle_files, files_list,
                                    documents_dictionary)
                    files_list = []  # cleaning the files list
                    if not iterate_over_parts + 1 == parts:
                        iterate_over_parts += 1
                        # update the last index of the next part
                        next_part = (int(fileNum / parts) * iterate_over_parts)
                if file_index == fileNum:  # The last index of the last part
                    executor.submit(handle_files, files_list,
                                    documents_dictionary)
                    break  # if we not iterate over the whole corpus
                file_index += 1
                # This function shut down the ThreadPool but wait until the Threads will finish
            executor.shutdown(wait=True)
        else:
            for subdir in sub_dirs:
                textList = Reader.separate(subdir)
                files_list.extend(textList)
                if file_index == next_part:
                    handle_files(files_list, documents_dictionary)
                    files_list = []  # cleaning the files list
                    if not iterate_over_parts + 1 == parts:
                        iterate_over_parts += 1
                        # update the last index of the next part
                        next_part = (int(fileNum / parts) * iterate_over_parts)
                if file_index == fileNum:  # The last index of the last part
                    handle_files(files_list, documents_dictionary)
                    break  # if we not iterate over the whole corpus
                file_index += 1

        sub_dirs = None
        files_list = None
        Stemmer.clean_cache()
        # Merge the temp files and removed them
        final_dictionary, cache_dictionary, posting_file_size = Indexer.merge_files(
            documents_dictionary)

        end_time = time.time()
        total_time = end_time - start_time

        # Stemmer.write_cache()
        print("Number of documents: " + str(len(documents_dictionary)))
        print("Number of terms: " + str(len(final_dictionary)))
        print("Time: " + str("{:.2f}".format(total_time)) + " seconds")
        print("Time: " + str("{:.2f}".format(total_time / 60)) + " minutes")

        final_dictionary_file_size = sys.getsizeof(final_dictionary)
        cache_file_size = sys.getsizeof(cache_dictionary)

        print("Posting file size: " + str(posting_file_size) + " Bytes")
        print("Dictionary file size: " + str(final_dictionary_file_size) +
              " Bytes")
        print("Cache file size: " + str(cache_file_size) + " Bytes")
        Writer.remove_temp_file()

        # Announce to the gui that indexing has concluded.
        global stem_mode
        self.view.finished_indexing(str(len(documents_dictionary)),
                                    str(final_dictionary_file_size),
                                    str(cache_file_size), str(int(total_time)),
                                    str(len(final_dictionary)),
                                    str(posting_file_size), stem_mode)

Example #4

0

Show file

File: Control.py Project: nevoit/NT-Search-Engine

 def fn_save_query_file_results(self, results, save_results_path):
     # Calls the writer to save the file
     Writer.save_query_file_results(results, save_results_path)

Example #5

0

Show file

File: Control.py Project: nevoit/NT-Search-Engine

 def fn_save_results(self, results, save_results_path):
     # Calls the writer to save the file
     Writer.save_regular_result(results, save_results_path)

Example #6

0

Show file

File: Control.py Project: nevoit/NT-Search-Engine

 def setDictSave(self, folder_name):
     # Save to final dictionary to disk
     Writer.save_final_dictionary(final_dictionary, folder_name)

Example #7

0

Show file

File: Control.py Project: nevoit/NT-Search-Engine

 def setCacheSave(self, folder_name):
     # Save to final dictionary to disk
     Writer.save_cache(cache_dictionary, folder_name)

Example #8

0

Show file

File: Control.py Project: nevoit/NT-Search-Engine

 def set_documents_save(self, folder_name):
     # Save to final dictionary to disk
     Writer.save_documents(documents_dictionary, folder_name)