def do_all_for_me(matr, bit_words_machine): """ It takes the matrix and calls all the functions necessary to compress it Args: matr: matrix to be compressed bit_words_machine: machine word bit number returns: matr_shape: shape of the matrix that we compress int_data: list of integers representing the huffman encoding of the vector data of the csc representation d_rev_data: dict encoded --> element row_index: vector of the row indices of the csc representation cum: vector of the number of elements of each column expected_c: number of columns in the matrix min_length_encoded: minimum length of huffman encodings """ data, row_index, cum = sparse_huffman.convert_dense_to_csc(matr) d_data, d_rev_data = huffman_sparse_encoded_dict(data) data_encoded = encoded_matrix(data, d_data, d_rev_data) int_data = huffman.convert_bin_to_int( huffman.make_words_list_to_int(data_encoded, bit_words_machine)) expected_c = len(cum) matr_shape = matr.shape min_length_encoded = huffman.min_len_string_encoded(d_rev_data) return matr_shape, int_data, d_rev_data, row_index, cum, expected_c, min_length_encoded
def __init__(self, *args, **kwargs): super(SparseHuffmanOnlyDataTest, self).__init__(*args, **kwargs) n = 500 m = 100 self.input_x = np.random.randint(1000, size=(70, n)) p = 0.7 #probablità che sia "prunato" mask = np.random.choice(a=[False, True], size=(n, m), p=[p, 1 - p]) self.matr = np.random.randint(500, size=(n, m)) * (1 * mask) self.data, self.row_index, self.cum = sparse_huffman.convert_dense_to_csc( self.matr) self.d_data, self.d_rev_data = sparse_huffman_only_data.huffman_sparse_encoded_dict( self.data) data_encoded = sparse_huffman_only_data.encoded_matrix( self.data, self.d_data, self.d_rev_data) self.bit_words_machine = 64 self.int_data = huffman.convert_bin_to_int( huffman.make_words_list_to_int(data_encoded, self.bit_words_machine)) self.expected_c = len(self.cum) self.min_length_encoded = huffman.min_len_string_encoded( self.d_rev_data)
def do_all_for_me(matr, bit_words_machine): """ It takes the matrix and calls all the functions necessary to compress it Args: matr: matrix to be compressed bit_words_machine: machine word bit number returns: matr_shape: shape of the matrix that we compress int_data, int_row_index, int_cum: lists of integers representing the huffman coding of the vectors of the csc representation (cum contains, for each column, the number of non-zero values. usually a cumulative value is used) d_rev_data, d_rev_row_index, d_rev_cum: dicts encoded --> element expected_c: number of columns in the matrix min_length_encoded_d/r/c: minimum length of huffman encodings for each vector """ data, row_index, cum = convert_dense_to_csc(matr) d_data, d_rev_data, d_row_index, d_rev_row_index, d_cum, d_rev_cum = huffman_sparse_encoded_dict( data, row_index, cum) data_encoded, row_index_encoded, cum_encoded = encoded_matrix( data, d_data, d_rev_data, row_index, d_row_index, d_rev_row_index, cum, d_cum, d_rev_cum) int_data = huffman.convert_bin_to_int( huffman.make_words_list_to_int(data_encoded, bit_words_machine)) int_row_index = huffman.convert_bin_to_int( huffman.make_words_list_to_int(row_index_encoded, bit_words_machine)) int_cum = huffman.convert_bin_to_int( huffman.make_words_list_to_int(cum_encoded, bit_words_machine)) expected_c = len(cum) matr_shape = matr.shape min_length_encoded_c = huffman.min_len_string_encoded(d_rev_cum) min_length_encoded_d = huffman.min_len_string_encoded(d_rev_data) min_length_encoded_r = huffman.min_len_string_encoded(d_rev_row_index) return matr_shape, int_data, int_row_index, int_cum, d_rev_data, d_rev_row_index, d_rev_cum, expected_c, min_length_encoded_d, min_length_encoded_r, min_length_encoded_c
def __init__(self, *args, **kwargs): super(HuffmanTest, self).__init__(*args, **kwargs) self.input_x = np.random.randint(1000, size=(1000, 500)) self.matr = np.random.randint(500, size=(500, 250)) symb2freq = huffman.dict_elem_freq(self.matr) e = huffman.encode(symb2freq) self.d_rev = huffman.reverse_elements_list_to_dict(e) self.d = dict(e) self.encoded = huffman.matrix_with_code(self.matr, self.d, self.d_rev) self.bit_words_machine = 64 self.list_bin = huffman.make_words_list_to_int(self.encoded, self.bit_words_machine) self.min_length_encoded = huffman.min_len_string_encoded(self.d_rev)