def test_code_decode_with_zeros_columns(self): n = 50 m = 10 p = 0.7 #probablità che sia "prunato" mask = np.random.choice(a=[False, True], size=(n, m), p=[p, 1 - p]) matr = np.random.randint(100, size=(n, m)) * (1 * mask) matr[:, 3] = 0 matr[:, 6] = 0 data, row_index, cum = sparse_huffman.convert_dense_to_csc(matr) d_data, d_rev_data, d_row_index, d_rev_row_index, d_cum, d_rev_cum = sparse_huffman.huffman_sparse_encoded_dict( data, row_index, cum) data_encoded, row_index_encoded, cum_encoded = sparse_huffman.encoded_matrix( data, d_data, d_rev_data, row_index, d_row_index, d_rev_row_index, cum, d_cum, d_rev_cum) int_data = huffman.convert_bin_to_int( huffman.make_words_list_to_int(data_encoded, self.bit_words_machine)) int_row_index = huffman.convert_bin_to_int( huffman.make_words_list_to_int(row_index_encoded, self.bit_words_machine)) int_cum = huffman.convert_bin_to_int( huffman.make_words_list_to_int(cum_encoded, self.bit_words_machine)) expected_c = len(cum) dense = sparse_huffman.sparsed_encoded_to_dense( matr.shape, int_data, int_row_index, int_cum, d_rev_data, d_rev_row_index, d_rev_cum, self.bit_words_machine, expected_c) self.assertTrue(np.all(dense == matr))
def do_all_for_me(matr, bit_words_machine): """ It takes the matrix and calls all the functions necessary to compress it Args: matr: matrix to be compressed bit_words_machine: machine word bit number returns: matr_shape: shape of the matrix that we compress int_data: list of integers representing the huffman encoding of the vector data of the csc representation d_rev_data: dict encoded --> element row_index: vector of the row indices of the csc representation cum: vector of the number of elements of each column expected_c: number of columns in the matrix min_length_encoded: minimum length of huffman encodings """ data, row_index, cum = sparse_huffman.convert_dense_to_csc(matr) d_data, d_rev_data = huffman_sparse_encoded_dict(data) data_encoded = encoded_matrix(data, d_data, d_rev_data) int_data = huffman.convert_bin_to_int( huffman.make_words_list_to_int(data_encoded, bit_words_machine)) expected_c = len(cum) matr_shape = matr.shape min_length_encoded = huffman.min_len_string_encoded(d_rev_data) return matr_shape, int_data, d_rev_data, row_index, cum, expected_c, min_length_encoded
def __init__(self, *args, **kwargs): super(SparseHuffmanOnlyDataTest, self).__init__(*args, **kwargs) n = 500 m = 100 self.input_x = np.random.randint(1000, size=(70, n)) p = 0.7 #probablità che sia "prunato" mask = np.random.choice(a=[False, True], size=(n, m), p=[p, 1 - p]) self.matr = np.random.randint(500, size=(n, m)) * (1 * mask) self.data, self.row_index, self.cum = sparse_huffman.convert_dense_to_csc( self.matr) self.d_data, self.d_rev_data = sparse_huffman_only_data.huffman_sparse_encoded_dict( self.data) data_encoded = sparse_huffman_only_data.encoded_matrix( self.data, self.d_data, self.d_rev_data) self.bit_words_machine = 64 self.int_data = huffman.convert_bin_to_int( huffman.make_words_list_to_int(data_encoded, self.bit_words_machine)) self.expected_c = len(self.cum) self.min_length_encoded = huffman.min_len_string_encoded( self.d_rev_data)
def test_dot_encode(self): #confronto la dot con huffman con la dot di numpy int_from_string = huffman.convert_bin_to_int( self.list_bin) #creo lista di interi di 64 bit dot_encode = huffman.dot_for_col(self.input_x, int_from_string, self.matr, self.d_rev, self.bit_words_machine, self.matr.dtype, self.min_length_encoded) numpy_dot = np.dot(self.input_x, self.matr) self.assertTrue(np.all(numpy_dot == dot_encode))
def do_all_for_me(matr, bit_words_machine): """ It takes the matrix and calls all the functions necessary to compress it Args: matr: matrix to be compressed bit_words_machine: machine word bit number returns: matr_shape: shape of the matrix that we compress int_data, int_row_index, int_cum: lists of integers representing the huffman coding of the vectors of the csc representation (cum contains, for each column, the number of non-zero values. usually a cumulative value is used) d_rev_data, d_rev_row_index, d_rev_cum: dicts encoded --> element expected_c: number of columns in the matrix min_length_encoded_d/r/c: minimum length of huffman encodings for each vector """ data, row_index, cum = convert_dense_to_csc(matr) d_data, d_rev_data, d_row_index, d_rev_row_index, d_cum, d_rev_cum = huffman_sparse_encoded_dict( data, row_index, cum) data_encoded, row_index_encoded, cum_encoded = encoded_matrix( data, d_data, d_rev_data, row_index, d_row_index, d_rev_row_index, cum, d_cum, d_rev_cum) int_data = huffman.convert_bin_to_int( huffman.make_words_list_to_int(data_encoded, bit_words_machine)) int_row_index = huffman.convert_bin_to_int( huffman.make_words_list_to_int(row_index_encoded, bit_words_machine)) int_cum = huffman.convert_bin_to_int( huffman.make_words_list_to_int(cum_encoded, bit_words_machine)) expected_c = len(cum) matr_shape = matr.shape min_length_encoded_c = huffman.min_len_string_encoded(d_rev_cum) min_length_encoded_d = huffman.min_len_string_encoded(d_rev_data) min_length_encoded_r = huffman.min_len_string_encoded(d_rev_row_index) return matr_shape, int_data, int_row_index, int_cum, d_rev_data, d_rev_row_index, d_rev_cum, expected_c, min_length_encoded_d, min_length_encoded_r, min_length_encoded_c