def get_all_document_names(training=True):
    batches = get_batch_list(training)
    doc_names_collected = []

    for batch in batches:
        doc_names_collected += list(s.load(open(env_paths.get_doc_names_path(training, int(batch)), "rb")))

    return doc_names_collected
def get_all_document_names(training=True):
    batches = get_batch_list(training)
    doc_names_collected = []

    for batch in batches:
        doc_names_collected += list(s.load(open(env_paths.get_doc_names_path(training, int(batch)), "rb")))

    return doc_names_collected
def get_document_names(batch, training=True):
    """
    Get document names.
    
    @param batch: the number of the batch.
    @param training: is this the training set or the test set.
    """
    names = s.load(open(env_paths.get_doc_names_path(training, batch), "rb"))
    return names
def get_document_names(batch, training=True):
    """
    Get document names.
    
    @param batch: the number of the batch.
    @param training: is this the training set or the test set.
    """
    names = s.load(open(env_paths.get_doc_names_path(training, batch), "rb"))
    return names
def get_document_name(row, batch, training=True):
    """
    The name of the document corresponding to a row
    in a batch.

    @param row: row in the bag of words matrix in batch.
    @param batch: the number of the batch.
    @param training: is this the training set or the test set.
    """
    return s.load(open(env_paths.get_doc_names_path(training, batch), "rb"))[row]
def get_document_name(row, batch, training=True):
    """
    The name of the document corresponding to a row
    in a batch.

    @param row: row in the bag of words matrix in batch.
    @param batch: the number of the batch.
    @param training: is this the training set or the test set.
    """
    return s.load(open(env_paths.get_doc_names_path(training, batch), "rb"))[row]
    def __save_batch_loading_docs(self, batch_number, docs_list, docs_names, class_indices):
        """
        Save batches for the document loading process in the initialization phase. This is done due to vast sizes
        of data - lack of memory.

        @param batch_number: Representing the number of documents in the batch.
        @param docs_list: List containing a string for each document in the batch.
        @param docs_names: List containing the names of each document in the same order as the docs_list.
        @param class_indices: List containing which class/folder each document belongs to.
        """
        # Serialize all relevant variables
        s.dump(docs_list, open(env_paths.get_doc_list_path(self.training, batch_number), "wb"))
        s.dump(docs_names, open(env_paths.get_doc_names_path(self.training, batch_number), "wb"))
        s.dump(class_indices, open(env_paths.get_class_indices_path(self.training, batch_number), "wb"))
    def __save_batch_loading_docs(self, batch_number, docs_list, docs_names, class_indices):
        """
        Save batches for the document loading process in the initialization phase. This is done due to vast sizes
        of data - lack of memory.

        @param batch_number: Representing the number of documents in the batch.
        @param docs_list: List containing a string for each document in the batch.
        @param docs_names: List containing the names of each document in the same order as the docs_list.
        @param class_indices: List containing which class/folder each document belongs to.
        """
        # Serialize all relevant variables
        s.dump(docs_list, open(env_paths.get_doc_list_path(self.training, batch_number), "wb"))
        s.dump(docs_names, open(env_paths.get_doc_names_path(self.training, batch_number), "wb"))
        s.dump(class_indices, open(env_paths.get_class_indices_path(self.training, batch_number), "wb"))