def compare_real_data_to_reconstructed_data():
    weights = s.load(open(env_paths.get_dbn_weight_path(),"rb"))
    batches = s.load(open(env_paths.get_batches_path(train=False),"rb"))
    class_indices = s.load(open(env_paths.get_class_indices_path(False,batches[0]).replace(".0",""),"rb"))
    batch = batches[0]
    data = data_processing.get_bag_of_words_matrix(batch,training = False)


    dict = {}
    for i in range(len(class_indices)):
        idx = class_indices[i]
        if idx in dict.keys(): continue
        dict[idx] = data[i]
        if len(dict) >= 10:
            break

    print dict.keys()

    data_points = dict.values()

    output_data_points = []
    for d in data_points:
        d = append(d,1.)
        out = generate_output_data(d,weights)
        output_data_points.append(out)

    visualise_data_points(data_points,output_data_points)
def get_class_indices(batch, training=True):
    """
    Get all class indices of the documents in a batch.
    
    @param batch: the number of the batch.
    @param training: is this the training set or the test set.
    """

    indices = s.load(env_paths.get_class_indices_path(training, batch), "rb")
    return indices
def get_class_indices(batch, training=True):
    """
    Get all class indices of the documents in a batch.
    
    @param batch: the number of the batch.
    @param training: is this the training set or the test set.
    """

    indices = s.load(env_paths.get_class_indices_path(training, batch), "rb")
    return indices
def get_document_class(row, batch, training=True):
    """
    The class of a document corresponding to a row
    in a batch.
    
    @param row: row in the bag of words matrix in batch.
    @param batch: the number of the batch.
    @param training: is this the training set or the test set.
    """
    class_indices_for_batch = s.load(open(env_paths.get_class_indices_path(training, batch), "rb"))
    class_names_for_batch = s.load(open(env_paths.get_class_names_path(training), "rb"))
    return class_names_for_batch[class_indices_for_batch[row]]
def get_document_class(row, batch, training=True):
    """
    The class of a document corresponding to a row
    in a batch.
    
    @param row: row in the bag of words matrix in batch.
    @param batch: the number of the batch.
    @param training: is this the training set or the test set.
    """
    class_indices_for_batch = s.load(open(env_paths.get_class_indices_path(training, batch), "rb"))
    class_names_for_batch = s.load(open(env_paths.get_class_names_path(training), "rb"))
    return class_names_for_batch[class_indices_for_batch[row]]
def get_all_class_indices(training=True):
    """
    Get all class indices for all batches in one list.

    @param training: is this the training set or the test set.
    """
    batches = get_batch_list(training)
    indices_collected = []

    for batch in batches:
        indices_collected += list(s.load(open(env_paths.get_class_indices_path(training, int(batch)), "rb")))

    return indices_collected
def get_all_class_indices(training=True):
    """
    Get all class indices for all batches in one list.

    @param training: is this the training set or the test set.
    """
    batches = get_batch_list(training)
    indices_collected = []

    for batch in batches:
        indices_collected += list(s.load(open(env_paths.get_class_indices_path(training, int(batch)), "rb")))

    return indices_collected
    def __save_batch_loading_docs(self, batch_number, docs_list, docs_names, class_indices):
        """
        Save batches for the document loading process in the initialization phase. This is done due to vast sizes
        of data - lack of memory.

        @param batch_number: Representing the number of documents in the batch.
        @param docs_list: List containing a string for each document in the batch.
        @param docs_names: List containing the names of each document in the same order as the docs_list.
        @param class_indices: List containing which class/folder each document belongs to.
        """
        # Serialize all relevant variables
        s.dump(docs_list, open(env_paths.get_doc_list_path(self.training, batch_number), "wb"))
        s.dump(docs_names, open(env_paths.get_doc_names_path(self.training, batch_number), "wb"))
        s.dump(class_indices, open(env_paths.get_class_indices_path(self.training, batch_number), "wb"))
    def __save_batch_loading_docs(self, batch_number, docs_list, docs_names, class_indices):
        """
        Save batches for the document loading process in the initialization phase. This is done due to vast sizes
        of data - lack of memory.

        @param batch_number: Representing the number of documents in the batch.
        @param docs_list: List containing a string for each document in the batch.
        @param docs_names: List containing the names of each document in the same order as the docs_list.
        @param class_indices: List containing which class/folder each document belongs to.
        """
        # Serialize all relevant variables
        s.dump(docs_list, open(env_paths.get_doc_list_path(self.training, batch_number), "wb"))
        s.dump(docs_names, open(env_paths.get_doc_names_path(self.training, batch_number), "wb"))
        s.dump(class_indices, open(env_paths.get_class_indices_path(self.training, batch_number), "wb"))
def save_batch(batch,batch_lbl,batchno,training):
    pickle.dump(batch_lbl, open(env_paths.get_class_indices_path(training,batchno), "wb"))
    pickle.dump(batch, open(env_paths.get_bow_matrix_path(training,batchno), "wb"))